def get_jsbrowser(self, *args, **kwargs): ''' Override this method in your recipe if you want to use a non-standard Browser object. ''' from calibre.web.jsbrowser.browser import Browser return Browser( default_timeout=kwargs.get('default_timeout', self.timeout))
def setUpClass(cls): cls.port = 17983 cls.server = Server() cherrypy.config.update({ 'log.screen' : False, 'checker.on' : False, 'engine.autoreload_on' : False, 'request.show_tracebacks': True, 'server.socket_host' : b'127.0.0.1', 'server.socket_port' : cls.port, 'server.socket_timeout' : 10, #seconds 'server.thread_pool' : 5, # number of threads setting to 1 causes major slowdown 'server.shutdown_timeout': 0.1, # minutes }) cherrypy.tree.mount(cls.server, '/', config={'/':{}}) cls.server_thread = threading.Thread(target=cls.run_server) cls.server_thread.daemon = True cls.server_thread.start() cls.browser = Browser(verbosity=0)
def do_login(login_url, calibre_browser, form_selector, controls={}, num_of_replies=0, timeout=60.0, verbosity=0, pause_time=5, post_visit_callback=None, post_submit_callback=None, submit_control_selector=None): ua = USER_AGENT for key, val in calibre_browser.addheaders: if key.lower() == 'user-agent': ua = val break br = Browser(user_agent=ua, verbosity=verbosity) if not br.visit(login_url, timeout=timeout): raise ValueError('Failed to load the login URL: %r'%login_url) if callable(post_visit_callback): post_visit_callback(br) f = br.select_form(form_selector) for key, val in controls.iteritems(): f[key] = val # br.show_browser() if num_of_replies > 0: br.ajax_submit(num_of_replies=num_of_replies, timeout=timeout, submit_control_selector=submit_control_selector) else: br.submit(timeout=timeout, submit_control_selector=submit_control_selector) # Give any javascript some time to run br.run_for_a_time(pause_time) if callable(post_submit_callback): post_submit_callback(br) br.show_browser() cj = calibre_browser.cookiejar for cookie in br.cookies: cj.set_cookie(cookie) html = br.html br.close() return html
def search(title, author, size, timeout, debug=False): import time from calibre.web.jsbrowser.browser import Browser, LoadWatcher, Timeout ans = [] start_time = time.time() br = Browser(user_agent=USER_AGENT, enable_developer_tools=debug) br.visit('https://www.google.com/advanced_image_search') f = br.select_form('form[action="/search"]') f['as_q'] = '%s %s'%(title, author) if size != 'any': f['imgsz'] = size f['imgar'] = 't|xt' f['as_filetype'] = 'jpg' br.submit(wait_for_load=False) # Loop until the page finishes loading or at least five image urls are # found lw = LoadWatcher(br.page, br) while lw.is_loading and len(ans) < 5: br.run_for_a_time(0.2) find_image_urls(br, ans) if time.time() - start_time > timeout: raise Timeout('Timed out trying to load google image search page') find_image_urls(br, ans) if debug: br.show_browser() br.close() del br # Needed to prevent PyQt from segfaulting return ans
def search(title, author, size, timeout, debug=False): import time from calibre.web.jsbrowser.browser import Browser, LoadWatcher, Timeout ans = [] start_time = time.time() br = Browser(user_agent=USER_AGENT, enable_developer_tools=debug) br.visit('https://www.google.com/advanced_image_search') f = br.select_form('form[action="/search"]') f['as_q'] = '%s %s' % (title, author) if size != 'any': f['imgsz'] = size f['imgar'] = 't|xt' f['as_filetype'] = 'jpg' br.submit(wait_for_load=False) # Loop until the page finishes loading or at least five image urls are # found lw = LoadWatcher(br.page, br) while lw.is_loading and len(ans) < 5: br.run_for_a_time(0.2) find_image_urls(br, ans) if time.time() - start_time > timeout: raise Timeout('Timed out trying to load google image search page') find_image_urls(br, ans) if debug: br.show_browser() br.close() del br # Needed to prevent PyQt from segfaulting return ans