def process_request( self, request, spider ): 
#         if 'renderjs' in request.meta:
         webview = self._get_webview() 
        # subwindow = gtk.ScrolledWindow() 
        # subwindow.add(webview) 
         webview.load_uri(request.url)
         webview.connect('load-finished', self.load_finished)
         webview.connect('document-load-finished', self.document_load_finished)
         webview.connect('console-message', self.console_message) 
        # self.g.add(subwindow)
         gtk.main() 
         #gtk.main_quit()
         ctx = jswebkit.JSContext(webview.get_main_frame().get_global_context()) 
         url = ctx.EvaluateScript('window.location.href') 
         html = ctx.EvaluateScript('document.documentElement.innerHTML')
         #open("html1.html","wb").write(html)
         #commentnum=sel.xpath('//span[@id="changyan_parti_unit"]/text').extract() 
         #print commentnum
        # print "huangfeng1"
        # body=html.encode('utf-8')
        # print body
        # print  HtmlResponse(url, encoding='utf-8', body=html.encode('utf-8'))
        # print "huangfeng2"
#         print html
         return  HtmlResponse(url, encoding='utf-8', body=html.encode('utf-8')) 
示例#2
0
def show_result(view, frame):
    print frame.get_title()
    print frame.get_uri()
    JSctx = frame.get_global_context()
    ctx = jswebkit.JSContext(JSctx)
    text = ctx.EvaluateScript('document.documentElement.innerHTML')
    print str(text)
示例#3
0
 def _load_finished(self, deferred, view, frame):
     if frame != view.get_main_frame():
         return
     ctx = jswebkit.JSContext(frame.get_global_context())
     url = ctx.EvaluateScript('window.location.href')
     html = ctx.EvaluateScript('document.documentElement.innerHTML')
     response = HtmlResponse(url,
                             encoding='utf-8',
                             body=html.encode('utf-8'))
     deferred.callback(response)
示例#4
0
def get(url):
    webview = webkit.WebView()
    webview.connect('load-finished', lambda v, f: gtk.main_quit())
    webview.load_uri(url)
    gtk.main()
    js = jswebkit.JSContext(webview.get_main_frame().get_global_context())
    renderedBody = str(js.EvaluateScript('document.body.innerHTML'))
    print renderedBody
    with open("temp", "wb") as f:
        f.write(renderedBody)
示例#5
0
def load_finished(view, frame):
    # called when the document finishes loading
    if frame != view.get_main_frame():
        return
    ctx = jswebkit.JSContext(frame.get_global_context())
    res = ctx.EvaluateScript('window.location.href')
    print res
    res = ctx.EvaluateScript('document.body.innerHTML')
    tree = lxml.html.fromstring(res)
    print tree.xpath('//input[@type="submit"]')
示例#6
0
 def load_finished(self, *args, **kw):
     try:
         print 'Render.load_finished'
         js = jswebkit.JSContext(
             self.webview.get_main_frame().get_global_context())
         self.rendered_html = str(
             js.EvaluateScript('document.body.innerHTML'))
         self.pending.set()
     except Exception, e:
         print e
 def process_request(self, request, spider):
     if (type(request) is not FormRequest):
         webview = webkit.WebView()
         webview.connect('load-finished', lambda v, f: gtk.main_quit())
         webview.load_uri(request.url)
         gtk.main()
         js = jswebkit.JSContext(
             webview.get_main_frame().get_global_context())
         renderedBody = str(
             js.EvaluateScript('document.documentElement.innerHTML'))
         return HtmlResponse(request.url, body=renderedBody)
示例#8
0
 def __init__(self, widget, uri):
     webkit.WebView.__init__(self)
     self._widget_window = widget
     settings = self.get_settings()
     settings.set_property("enable-developer-extras", True)
     #self.load_uri(uri)
     self.set_transparent(True)
     self.connect("script-prompt", self._script_callback)
     self.open(uri)
     self._ctx = jswebkit.JSContext(
         self.get_main_frame().get_global_context())
 def load_finished(self, view, frame): 
   #  if frame != view.get_main_frame(): 
   #      return
   #  ctx = jswebkit.JSContext(frame.get_global_context())
     ctx = jswebkit.JSContext(view.get_main_frame().get_global_context()) 
     url = ctx.EvaluateScript('window.location.href') 
     html = ctx.EvaluateScript('document.documentElement.innerHTML') 
     response = HtmlResponse(url, encoding='utf-8', body=html.encode('utf-8'))
     print "finished"
     self.stop_gtk()
     self.d.callback(response) 
示例#10
0
 def process_request(self, request, spider):
     if 'renderjs' in request.meta:
         webview = self._get_webview()
         webview.connect('load-finished', self.stop_gtk)
         webview.load_uri(request.url)
         gtk.main()
         ctx = jswebkit.JSContext(
             webview.get_main_frame().get_global_context())
         url = ctx.EvaluateScript('window.location.href')
         html = ctx.EvaluateScript('document.documentElement.innerHTML')
         return HtmlResponse(url,
                             encoding='utf-8',
                             body=html.encode('utf-8'))
示例#11
0
 def process_request(self, request, spider):
     if  not request.meta.has_key('no_webkit') and type(request) is not FormRequest:
         webview = webkit.WebView()
         #set browser settings
         #settings = webkit.WebSettings()
         #settings.set_property('user-agent','Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10')
         #webview.set_settings(settings)     
         webview.connect('load-finished', lambda v,f: gtk.main_quit())
         webview.load_uri(request.url)
         gtk.main()        
         js = jswebkit.JSContext(webview.get_main_frame().get_global_context())
         renderedBody = str(js.EvaluateScript('document.documentElement.innerHTML'))
         return HtmlResponse(request.url, body=renderedBody)  
示例#12
0
 def load_finished_cb(self, view, frame):
     print "load_finished"
     ctx = jswebkit.JSContext(self.get_main_frame().get_global_context())
     window = ctx.EvaluateScript("window")
     #window.alert(None, "window")
     #window.foo = "bar"
     #print ctx.EvaluateScript("window.foo")
     document = ctx.EvaluateScript("document")
     #print "Title : ",document.title
     #form = document.forms[0]
     #print form.action
     #form.elements[1].value = "this is me"
     #form.elements[2].click(form.elements[2])
     atags = document.getElementsByTagName(document, "a")
     print atags.getPropertyNames()
     for a in atags:
         print a.href
示例#13
0
    def _doc_load_finished(self, view, frame):
        ctx = jswebkit.JSContext(frame.get_global_context())
        doc = ctx.EvaluateScript("document")
        links = doc.getElementsByTagName(doc, "a")
        for link in links:
            self.links.append(link.href)
        # nodes = doc.getElementsByTagName('body')
        # body = nodes.item(0)

        # d = doc.createElement("div")
        # b = doc.createElement("Button")
        # b.innerHTML = "hello"
        # b.onclick = self._button_click_event
        # d.appendChild(b)
        # txt = doc.createTextNode("hello world")
        # body.appendChild(txt)
        # body.appendChild(d)
        # body.tabIndex = 5
        threading.Timer(2, self._webview_done).start()
示例#14
0
    def process_request(self, request, spider):
        print '1111111'
        print spider.name
        if spider.name in settings.WEBKIT_DOWNLOADER:
            print '2222'
            if (type(request) is not FormRequest):

                print '333333'
                webview = webkit.WebView()
                print request.url
                #webview.conner('load-finished',lambda v,f:gtk.main_quit())
                webview.connect('load-finished', lambda v, f: gtk.main_quit())
                webview.load_uri(request.url)
                gtk.main()
                js = jswebkit.JSContext(
                    webview.get_main_frame().get_global_context())
                renderedBody = str(
                    js.EvaluateScript('document.body.innerHTML'))
                print renderedBody
                return HtmlResponse(request.url, body=renderedBody)
示例#15
0
 def get_html(self):
     frame = self.webview.get_main_frame()
     ctx = jswebkit.JSContext(frame.get_global_context())
     text = ctx.EvaluateScript("document.body.innerHTML")
     return text