def start_object(self, attrs): attrs = self.object_fix(attrs) domobj = None for k, v in attrs: if k == 'classid': try: domobj = ActiveXObject(v, 'id') except UserWarning: pass # ActiveX object may be initiallized by classid or classname. # If created by 'object' tag, classid will be used. Check out # the class definition in 'ActiveX/ActiveX.py' for more. if not domobj: config.VERBOSE( config.VERBOSE_DEBUG, "[DEBUG] PageParser.py: Ignoring start_object attrs: " + str(attrs)) self.ignoreObj = True return for k, v in attrs: if k == 'id' or k == 'name': if self.__dict__['__window'].__dict__['__cx'].execute( 'typeof ' + v + ' == "undefined"'): self.__dict__['__window'].__dict__['__cx'].add_global( v, domobj) self.__dict__['__window'].__dict__['__fl'][-1].__setattr__( v, domobj) domobj.__setattr__(dataetc.attrTrans(k, 'object'), v) self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj)
def start_object(self, attrs): attrs = self.object_fix(attrs) domobj = None for k, v in attrs: if k == 'classid': try: domobj = ActiveXObject(v, 'id') except UserWarning: pass # ActiveX object may be initiallized by classid or classname. # If created by 'object' tag, classid will be used. Check out # the class definition in 'ActiveX/ActiveX.py' for more. if not domobj: config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] PageParser.py: Ignoring start_object attrs: " +str(attrs)) self.ignoreObj = True return for k, v in attrs: if k == 'id' or k == 'name': if self.__dict__['__window'].__dict__['__cx'].execute('typeof ' + v + ' == "undefined"'): self.__dict__['__window'].__dict__['__cx'].add_global(v, domobj) self.__dict__['__window'].__dict__['__fl'][-1].__setattr__(v, domobj) domobj.__setattr__(dataetc.attrTrans(k, 'object'), v) self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj)
def unknown_starttag(self, tag, attrs): if self.in_Script: self.handle_data(self.get_starttag_text()) return config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag) if self.endearly: return domobj = DOMObject(self.__dict__['__window'], tag, self) #sometimes k in tag is not really attrname, so a transform is needed. #note that this is IE way. In firefox transform is done in DOMObject.setAttribute() for name, value in attrs: domobj.setAttribute(dataetc.attrTrans(name, tag), value) if dataetc.isevent(name.lower(), tag): self.emulate_timeout(name, value) if tag == 'script': domobj.__dict__['script'] = '' if config.retrieval_all: if 'src' in domobj.__dict__: src = self.__dict__['__window'].document.location.fix_url( domobj.src) script, headers = hc.get( src, self.__dict__['__window'].document.location.href) # if config.replace_nonascii: # script = re.sub('[\x80-\xff]',' ',script) try: begin = self.html.lower()[self.current:].index('<' + tag) start = self.current + begin offset = begin + self.html.lower()[start:].index('>') + 1 self.current += offset domobj.__dict__['begin'] = self.current domobj.__dict__['end'] = self.current + self.html.lower( )[self.current:].index('</' + tag) if (tag == 'div' and attrs) or tag == 'body': domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj. __dict__['end']] except: pass self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj) if tag == 'form': self.__dict__['__window'].__dict__['__fl'].append(domobj) if tag == 'br' or tag == 'meta': self.unknown_endtag(tag) # <br> and <meta> have no end tag. if tag == 'select': self.lastselect = domobj if tag == 'option': try: self.lastselect.options.append(domobj) except: pass
def unknown_starttag(self, tag, attrs): if self.in_Script: self.handle_data(self.get_starttag_text()) return config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag) if self.endearly: return domobj = DOMObject(self.__dict__['__window'], tag, self) #sometimes k in tag is not really attrname, so a transform is needed. #note that this is IE way. In firefox transform is done in DOMObject.setAttribute() for name, value in attrs: domobj.setAttribute(dataetc.attrTrans(name, tag), value) if dataetc.isevent(name.lower(), tag): self.emulate_timeout(name, value) if tag == 'script': domobj.__dict__['script'] = '' if config.retrieval_all: if 'src' in domobj.__dict__: src = self.__dict__['__window'].document.location.fix_url(domobj.src) script, headers = hc.get(src, self.__dict__['__window'].document.location.href) # if config.replace_nonascii: # script = re.sub('[\x80-\xff]',' ',script) try: begin = self.html.lower()[self.current:].index('<' + tag) start = self.current + begin offset = begin + self.html.lower()[start:].index('>') + 1 self.current += offset domobj.__dict__['begin'] = self.current domobj.__dict__['end'] = self.current + self.html.lower()[self.current:].index('</'+tag) if (tag == 'div' and attrs) or tag == 'body': domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.__dict__['end']] except: pass self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj) if tag == 'form': self.__dict__['__window'].__dict__['__fl'].append(domobj) if tag == 'br' or tag == 'meta': self.unknown_endtag(tag) # <br> and <meta> have no end tag. if tag == 'select': self.lastselect = domobj if tag == 'option': try: self.lastselect.options.append(domobj) except: pass