class Ghost(object): """Ghost manages a QWebPage. :param user_agent: The default User-Agent header. :param wait_timeout: Maximum step duration in second. :param wait_callback: An optional callable that is periodically executed until Ghost stops waiting. :param log_level: The optional logging level. :param display: A boolean that tells ghost to displays UI. :param viewport_size: A tupple that sets initial viewport size. """ _alert = None _confirm_expected = None _prompt_expected = None _upload_file = None _app = None def __init__(self, user_agent=default_user_agent, wait_timeout=8, wait_callback=None, log_level=logging.WARNING, display=False, viewport_size=(800,600)): self.http_resources = [] self.user_agent = user_agent self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.loaded = True if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\ and not hasattr(Ghost, 'xvfb'): try: os.environ['DISPLAY'] = ':99' Ghost.xvfb = subprocess.Popen(['Xvfb', ':99']) except OSError: raise Exception('Xvfb is required to a ghost run oustside ' +\ 'an X instance') self.display = display if not Ghost._app: Ghost._app = QApplication.instance() or QApplication(['ghost']) self.page = GhostWebPage(Ghost._app) QtWebKit.QWebSettings.setMaximumPagesInCache(0) QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0); self.set_viewport_size(*viewport_size) self.page.loadFinished.connect(self._page_loaded) self.page.loadStarted.connect(self._page_load_started) self.manager = self.page.networkAccessManager() self.manager.finished.connect(self._request_ended) self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) self.main_frame = self.page.mainFrame() logger.setLevel(log_level) if self.display: self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() def __del__(self): self.exit() def capture(self, region=None, selector=None, format=QImage.Format_ARGB32): """Returns snapshot as QImage. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ if region is None and selector is not None: region = self.region_for_selector(selector) if region: x1, y1, x2, y2 = region w, h = (x2 - x1), (y2 - y1) image = QImage(QSize(x2, y2), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() image = image.copy(x1, y1, w, h) else: image = QImage(self.page.viewportSize(), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() return image def capture_to(self, path, region=None, selector=None, format=QImage.Format_ARGB32): """Saves snapshot as image. :param path: The destination path. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ self.capture(region=region, format=format, selector=selector).save(path) @client_utils_required @can_load_page def click(self, selector): """Click the targeted element. :param selector: A CSS3 selector to targeted element. """ if not self.exists(selector): raise Exception("Can't find element to click") return self.evaluate('GhostUtils.click("%s");' % selector) class confirm: """Statement that tells Ghost how to deal with javascript confirm(). :param confirm: A bollean that confirm. :param callable: A callable that returns a boolean for confirmation. """ def __init__(self, confirm=True, callback=None): self.confirm = confirm self.callback = callback def __enter__(self): Ghost._confirm_expected = (self.confirm, self.callback) def __exit__(self, type, value, traceback): Ghost._confirm_expected = None @property def content(self): """Returns current frame HTML as a string.""" return unicode(self.main_frame.toHtml()) @property def cookies(self): """Returns all cookies.""" return self.cookie_jar.allCookies() def delete_cookies(self): """Deletes all cookies.""" self.cookie_jar.setAllCookies([]) @can_load_page def evaluate(self, script): """Evaluates script in page frame. :param script: The script to evaluate. """ return (self.main_frame.evaluateJavaScript("%s" % script), self._release_last_resources()) def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. :param path: The path of the file. :param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def exists(self, selector): """Checks if element exists for given selector. :param string: The element selector. """ return not self.main_frame.findFirstElement(selector).isNull() def exit(self): """Exits application and relateds.""" if self.display: self.webview.close() Ghost._app.exit() del self.manager del self.page del self.main_frame if hasattr(self, 'xvfb'): self.xvfb.terminate() @can_load_page def fill(self, selector, values): """Fills a form with provided values. :param selector: A CSS selector to the target form to fill. :param values: A dict containing the values. """ if not self.exists(selector): raise Exception("Can't find form") resources = [] for field in values: r, res = self.set_field_value("%s [name=%s]" % (selector, field), values[field]) resources.extend(res) return True, resources @client_utils_required @can_load_page def fire_on(self, selector, method): """Call method on element matching given selector. :param selector: A CSS selector to the target element. :param method: The name of the method to fire. :param expect_loading: Specifies if a page loading is expected. """ return self.evaluate('GhostUtils.fireOn("%s", "%s");' % ( selector, method)) def global_exists(self, global_name): """Checks if javascript global exists. :param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name)[0] def hide(self): """Close the webview.""" try: self.webview.close() except: raise Exception("no webview to close") def open(self, address, method='get', headers={}, auth=None): """Opens a web page. :param address: The resource URL. :param method: The Http method. :param headers: An optional dict of extra request hearders. :param auth: An optional tupple of HTTP auth (Basic, username, password). :return: Page resource, All loaded resources. """ body = QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Exception("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) if auth is not None: auth_type, username, password = auth def authenticate(reply, authenticator): authenticator.setUser(username) authenticator.setPassword(password) self.page.networkAccessManager().authenticationRequired.connect(authenticate) if not "User-Agent" in headers: headers["User-Agent"] = self.user_agent for header in headers: request.setRawHeader(header, headers[header]) self.main_frame.load(request, method, body) self.loaded = False return self.wait_for_page_loaded() class prompt: """Statement that tells Ghost how to deal with javascript prompt(). :param value: A string value to fill in prompt. :param callback: A callable that returns the value to fill in. """ def __init__(self, value='', callback=None): self.value = value self.callback = callback def __enter__(self): Ghost._prompt_expected = (self.value, self.callback) def __exit__(self, type, value, traceback): Ghost._prompt_expected = None @client_utils_required def region_for_selector(self, selector): """Returns frame region for given selector as tupple. :param selector: The targeted element. """ geo = self.main_frame.findFirstElement(selector).geometry() try: region = (geo.left(), geo.top(), geo.right(), geo.bottom()) except: raise Exception("can't get region for selector '%s'" % selector) return region @can_load_page @client_utils_required def set_field_value(self, selector, value, blur=True): """Sets the value of the field matched by given selector. :param selector: A CSS selector that target the field. :param value: The value to fill in. :param blur: An optional boolean that force blur when filled in. """ def _set_text_value(selector, value): return self.evaluate( 'document.querySelector("%s").value=%s;' % (selector, json.dumps(value))) res, resources = None, [] element = self.main_frame.findFirstElement(selector) if element.isNull(): raise Exception('can\'t find element for %s"' % selector) self.fire_on(selector, 'focus') if element.tagName() in ["TEXTAREA", "SELECT"]: res, resources = _set_text_value(selector, value) elif element.tagName() == "INPUT": if element.attribute('type') in ["color", "date", "datetime", "datetime-local", "email", "hidden", "month", "number", "password", "range", "search", "tel", "text", "time", "url", "week"]: res, resources = _set_text_value(selector, value) elif element.attribute('type') == "checkbox": res, resources = self.evaluate( 'GhostUtils.setCheckboxValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "radio": res, resources = self.evaluate( 'GhostUtils.setRadioValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "file": Ghost._upload_file = value res, resources = self.click(selector) Ghost._upload_file = None else: raise Exception('unsuported field tag') if blur: self.fire_on(selector, 'blur') return res, resources def set_viewport_size(self, width, height): """Sets the page viewport size. :param width: An integer that sets width pixel count. :param height: An integer that sets height pixel count. """ self.page.setViewportSize(QSize(width, height)) def show(self): """Show current page inside a QWebView. """ self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() def wait_for_alert(self): """Waits for main frame alert(). """ self._wait_for(lambda: Ghost._alert is not None, 'User has not been alerted.') msg = Ghost._alert Ghost._alert = None return msg, self._release_last_resources() def wait_for_page_loaded(self): """Waits until page is loaded, assumed that a page as been requested. """ self._wait_for(lambda: self.loaded, 'Unable to load requested page') resources = self._release_last_resources() page = None for resource in resources: if self.main_frame.url().toString() == resource.url: page = resource return page, resources def wait_for_selector(self, selector): """Waits until selector match an element on the frame. :param selector: The selector to wait for. """ self._wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) return True, self._release_last_resources() def wait_for_text(self, text): """Waits until given text appear on main frame. :param text: The text to wait for. """ self._wait_for(lambda: text in self.content, 'Can\'t find "%s" in current frame' % text) return True, self._release_last_resources() def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources = self.http_resources self.http_resources = [] return last_resources def _page_loaded(self): """Called back when page is loaded. """ self.loaded = True def _page_load_started(self): """Called back when page load started. """ self.loaded = False def _request_ended(self, res): """Adds an HttpResource object to http_resources. :param res: The request result. """ if res.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(res)) def _wait_for(self, condition, timeout_message): """Waits until condition is True. :param condition: A callable that returns the condition. :param timeout_message: The exception message on timeout. """ started_at = time.time() while not condition(): if time.time() > (started_at + self.wait_timeout): raise Exception(timeout_message) time.sleep(0.01) Ghost._app.processEvents() if self.wait_callback is not None: self.wait_callback()
class Kit(object): _app = None def __init__(self, gui=False): if not Kit._app: Kit._app = QApplication([]) manager = KitNetworkAccessManager() manager.finished.connect(self.network_reply_handler) self.cookie_jar = QNetworkCookieJar() manager.setCookieJar(self.cookie_jar) self.page = KitPage() self.page.setNetworkAccessManager(manager) self.view = KitWebView() self.view.setPage(self.page) self.view.setApplication(Kit._app) if gui: self.view.show() def get_cookies(self): cookies = {} for cookie in self.cookie_jar.allCookies(): cookies[cookie.name().data()] = cookie.value().data() return cookies def request(self, url, user_agent='Mozilla', cookies=None, timeout=15, method='get', data=None, headers=None): if cookies is None: cookies = {} if headers is None: headers = {} url_info = urlsplit(url) self.resource_list = [] loop = QEventLoop() self.view.loadFinished.connect(loop.quit) # Timeout timer = QTimer() timer.setSingleShot(True) timer.timeout.connect(loop.quit) timer.start(timeout * 1000) # User-Agent self.page.user_agent = user_agent # Cookies cookie_obj_list = [] for name, value in cookies.items(): domain = ('.' + url_info.netloc).split(':')[0] #print 'CREATE COOKIE %s=%s' % (name, value) #print 'DOMAIN = %s' % domain cookie_obj = QNetworkCookie(name, value) cookie_obj.setDomain(domain) cookie_obj_list.append(cookie_obj) self.cookie_jar.setAllCookies(cookie_obj_list) # Method method_obj = getattr(QNetworkAccessManager, '%sOperation' % method.capitalize()) # Ensure that Content-Type is correct if method is post if method == 'post': headers['Content-Type'] = 'application/x-www-form-urlencoded' # Post data if data is None: data = QByteArray() # Request object request_obj = QNetworkRequest(QUrl(url)) # Headers for name, value in headers.items(): request_obj.setRawHeader(name, value) # Make a request self.view.load(request_obj, method_obj, data) loop.exec_() if timer.isActive(): request_resource = None url = str(self.page.mainFrame().url().toString()).rstrip('/') for res in self.resource_list: if url == res.url or url == res.url.rstrip('/'): request_resource = res break if request_resource: return self.build_response(request_resource) else: raise KitError('Request was successful but it is not possible' ' to associate the request to one of received' ' responses') else: raise KitError('Timeout while loading %s' % url) def build_response(self, resource): response = Response() response.head = '' response.code = resource.status_code runtime_body = self.page.mainFrame().toHtml() body = resource.reply.data url = resource.reply.url().toString() headers = resource.headers cookies = self.get_cookies() # py3 hack if PY3K: if isinstance(body, QByteArray): body = body.data() headers = decode_dict(headers) cookies = decode_dict(cookies) else: runtime_body = unicode(runtime_body) body = str(body) url = str(url) response.runtime_body = runtime_body.encode('utf-8') response.body = body response.url = url response.parse(charset='utf-8') response.headers = headers response.cookies = cookies return response def __del__(self): self.view.setPage(None) def network_reply_handler(self, reply): status_code = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute) if status_code: if not isinstance(status_code, int): status_code = status_code.toInt()[0] logger.debug('Resource loaded: %s [%d]' % (reply.url().toString(), status_code)) self.resource_list.append(Resource(reply))
def allCookies(self): return QNetworkCookieJar.allCookies(self)
class Ghost(object): """Ghost manages a QWebPage. :param user_agent: The default User-Agent header. :param wait_timeout: Maximum step duration in second. :param wait_callback: An optional callable that is periodically executed until Ghost stops waiting. :param log_level: The optional logging level. :param display: A boolean that tells ghost to displays UI. :param viewport_size: A tuple that sets initial viewport size. :param ignore_ssl_errors: A boolean that forces ignore ssl errors. :param cache_dir: A directory path where to store cache datas. :param plugins_enabled: Enable plugins (like Flash). :param java_enabled: Enable Java JRE. :param plugin_path: Array with paths to plugin directories (default ['/usr/lib/mozilla/plugins']) :param download_images: Indicate if the browser should download images """ _alert = None _confirm_expected = None _prompt_expected = None _upload_file = None _app = None def __init__(self, user_agent=default_user_agent, wait_timeout=8, wait_callback=None, log_level=logging.WARNING, display=False, viewport_size=(800, 600), ignore_ssl_errors=True, cache_dir=os.path.join(tempfile.gettempdir(), "ghost.py"), plugins_enabled=False, java_enabled=False, plugin_path=['/usr/lib/mozilla/plugins',], download_images=True, qt_debug=False): self.http_resources = [] self.user_agent = user_agent self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.ignore_ssl_errors = ignore_ssl_errors self.loaded = True if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\ and not hasattr(Ghost, 'xvfb'): try: os.environ['DISPLAY'] = ':99' Ghost.xvfb = subprocess.Popen(['Xvfb', ':99']) except OSError: raise Error('Xvfb is required to a ghost run outside ' + 'an X instance') self.display = display if not Ghost._app: Ghost._app = QApplication.instance() or QApplication(['ghost']) qInstallMsgHandler(QTMessageProxy(qt_debug)) if plugin_path: for p in plugin_path: Ghost._app.addLibraryPath(p) self.popup_messages = [] self.page = GhostWebPage(Ghost._app, self) QtWebKit.QWebSettings.setMaximumPagesInCache(0) QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0) QtWebKit.QWebSettings.globalSettings().setAttribute(QtWebKit.QWebSettings.LocalStorageEnabled, True) self.page.setForwardUnsupportedContent(True) self.page.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.page.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.page.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.page.loadFinished.connect(self._page_loaded) self.page.loadStarted.connect(self._page_load_started) self.page.unsupportedContent.connect(self._unsupported_content) self.manager = self.page.networkAccessManager() self.manager.finished.connect(self._request_ended) self.manager.sslErrors.connect(self._on_manager_ssl_errors) # Cache self.cache = QNetworkDiskCache() self.cache.setCacheDirectory(cache_dir) self.manager.setCache(self.cache) # Cookie jar self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) # User Agent self.page.setUserAgent(self.user_agent) self.page.networkAccessManager().authenticationRequired\ .connect(self._authenticate) self.page.networkAccessManager().proxyAuthenticationRequired\ .connect(self._authenticate) self.main_frame = self.page.mainFrame() logger.setLevel(log_level) if self.display: class MyQWebView(QtWebKit.QWebView): def sizeHint(self): return QSize(*viewport_size) self.webview = MyQWebView() if plugins_enabled: self.webview.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, True) if java_enabled: self.webview.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, True) self.webview.setPage(self.page) self.webview.show() else: self.webview = None def __del__(self): self.exit() def capture(self, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Returns snapshot as QImage. :param region: An optional tuple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ if region is None and selector is not None: region = self.region_for_selector(selector) if region: x1, y1, x2, y2 = region w, h = (x2 - x1), (y2 - y1) image = QImage(QSize(x2, y2), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() image = image.copy(x1, y1, w, h) else: self.main_frame.setScrollBarPolicy(QtCore.Qt.Vertical, QtCore.Qt.ScrollBarAlwaysOff) self.main_frame.setScrollBarPolicy(QtCore.Qt.Horizontal, QtCore.Qt.ScrollBarAlwaysOff) self.page.setViewportSize(self.main_frame.contentsSize()) image = QImage(self.page.viewportSize(), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() return image def capture_to(self, path, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Saves snapshot as image. :param path: The destination path. :param region: An optional tuple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ self.capture(region=region, format=format, selector=selector).save(path) def print_to_pdf(self, path, paper_size = (8.5, 11.0), paper_margins = (0, 0, 0, 0), paper_units = QPrinter.Inch, zoom_factor = 1.0, ): """Saves page as a pdf file. See qt4 QPrinter documentation for more detailed explanations of options. :param path: The destination path. :param paper_size: A 2-tuple indicating size of page to print to. :param paper_margins: A 4-tuple indicating size of each margin. :param paper_units: Units for pager_size, pager_margins. :param zoom_factor: Scale the output content. """ assert len(paper_size) == 2 assert len(paper_margins) == 4 printer = QPrinter(mode = QPrinter.ScreenResolution) printer.setOutputFormat(QPrinter.PdfFormat) printer.setPaperSize(QtCore.QSizeF(*paper_size), paper_units) printer.setPageMargins(*(paper_margins + (paper_units,))) printer.setFullPage(True) printer.setOutputFileName(path) if self.webview is None: self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.setZoomFactor(zoom_factor) self.webview.print_(printer) @can_load_page def click(self, selector): """Click the targeted element. :param selector: A CSS3 selector to targeted element. """ if not self.exists(selector): raise Error("Can't find element to click") return self.evaluate(""" var element = document.querySelector("%s"); var evt = document.createEvent("MouseEvents"); evt.initMouseEvent("click", true, true, window, 1, 1, 1, 1, 1, false, false, false, false, 0, element); element.dispatchEvent(evt) """ % selector) class confirm: """Statement that tells Ghost how to deal with javascript confirm(). :param confirm: A boolean to set confirmation. :param callable: A callable that returns a boolean for confirmation. """ def __init__(self, confirm=True, callback=None): self.confirm = confirm self.callback = callback def __enter__(self): Ghost._confirm_expected = (self.confirm, self.callback) def __exit__(self, type, value, traceback): Ghost._confirm_expected = None @property def content(self, to_unicode=True): """Returns current frame HTML as a string. :param to_unicode: Whether to convert html to unicode or not """ if to_unicode: return unicode(self.main_frame.toHtml()) else: return self.main_frame.toHtml() @property def cookies(self): """Returns all cookies.""" return self.cookie_jar.allCookies() def delete_cookies(self): """Deletes all cookies.""" self.cookie_jar.setAllCookies([]) def clear_alert_message(self): """Clears the alert message""" self._alert = None @can_load_page def evaluate(self, script): """Evaluates script in page frame. :param script: The script to evaluate. """ return (self.main_frame.evaluateJavaScript("%s" % script), self._release_last_resources()) def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. :param path: The path of the file. :param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def exists(self, selector): """Checks if element exists for given selector. :param string: The element selector. """ return not self.main_frame.findFirstElement(selector).isNull() def exit(self): """Exits application and related.""" if self.display: self.webview.close() Ghost._app.quit() del self.manager del self.page del self.main_frame if hasattr(self, 'xvfb'): self.xvfb.terminate() @can_load_page def fill(self, selector, values): """Fills a form with provided values. :param selector: A CSS selector to the target form to fill. :param values: A dict containing the values. """ if not self.exists(selector): raise Error("Can't find form") resources = [] for field in values: r, res = self.set_field_value("%s [name=%s]" % (selector, field), values[field]) resources.extend(res) return True, resources @can_load_page def fire_on(self, selector, method): """Call method on element matching given selector. :param selector: A CSS selector to the target element. :param method: The name of the method to fire. :param expect_loading: Specifies if a page loading is expected. """ return self.evaluate('document.querySelector("%s").%s();' % \ (selector, method)) def global_exists(self, global_name): """Checks if javascript global exists. :param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name)[0] def hide(self): """Close the webview.""" try: self.webview.close() except: raise Error("no webview to close") def load_cookies( self, cookie_storage, keep_old=False ): """load from cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string on disk or CookieJar instance. :param keep_old: Don't reset, keep cookies not overridden. """ def toQtCookieJar( PyCookieJar, QtCookieJar ): allCookies = QtCookieJar.cookies if keep_old else [] for pc in PyCookieJar: qc = toQtCookie(pc) allCookies.append(qc) QtCookieJar.setAllCookies(allCookies) def toQtCookie(PyCookie): qc = QNetworkCookie( PyCookie.name, PyCookie.value ) qc.setSecure(PyCookie.secure) if PyCookie.path_specified: qc.setPath(PyCookie.path) if PyCookie.domain != "" : qc.setDomain(PyCookie.domain) if PyCookie.expires != 0: t = QDateTime() t.setTime_t(PyCookie.expires) qc.setExpirationDate(t) # not yet handled(maybe less useful): # py cookie.rest / QNetworkCookie.setHttpOnly() return qc if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) cj.load() toQtCookieJar(cj, self.cookie_jar) elif cookie_storage.__class__.__name__.endswith('CookieJar') : toQtCookieJar(cookie_storage, self.cookie_jar) else: raise ValueError, 'unsupported cookie_storage type.' def open(self, address, method='get', headers={}, auth=None, body=None, default_popup_response=None): """Opens a web page. :param address: The resource URL. :param method: The Http method. :param headers: An optional dict of extra request hearders. :param auth: An optional tuple of HTTP auth (username, password). :param body: An optional string containing a payload. :param default_popup_response: the default response for any confirm/ alert/prompt popup from the Javascript (replaces the need for the with blocks) :return: Page resource, All loaded resources. """ body = body or QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Error("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl(0) for header in headers: request.setRawHeader(header, headers[header]) self._auth = auth self._auth_attempt = 0 # Avoids reccursion self.main_frame.load(request, method, body) self.loaded = False Ghost._prompt_expected = (default_popup_response, None) Ghost._confirm_expected = (default_popup_response, None) return self.wait_for_page_loaded() class prompt: """Statement that tells Ghost how to deal with javascript prompt(). :param value: A string value to fill in prompt. :param callback: A callable that returns the value to fill in. """ def __init__(self, value='', callback=None): self.value = value self.callback = callback def __enter__(self): Ghost._prompt_expected = (self.value, self.callback) def __exit__(self, type, value, traceback): Ghost._prompt_expected = None def region_for_selector(self, selector): """Returns frame region for given selector as tuple. :param selector: The targeted element. """ geo = self.main_frame.findFirstElement(selector).geometry() try: region = (geo.left(), geo.top(), geo.right(), geo.bottom()) except: raise Error("can't get region for selector '%s'" % selector) return region def save_cookies(self, cookie_storage): """Save to cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string or CookieJar instance. """ def toPyCookieJar(QtCookieJar, PyCookieJar): for c in QtCookieJar.allCookies(): PyCookieJar.set_cookie(toPyCookie(c)) def toPyCookie(QtCookie): port=None port_specified=False secure=QtCookie.isSecure() name=str(QtCookie.name()) value=str(QtCookie.value()) v = str(QtCookie.path()) path_specified = bool( v != "" ) path = v if path_specified else None v = str(QtCookie.domain()) domain_specified = bool( v != "" ) domain = v domain_initial_dot = v.startswith('.') if domain_specified else None v = long(QtCookie.expirationDate().toTime_t()) # Long type boundary on 32bit platfroms; avoid ValueError expires = 2147483647 if v > 2147483647 else v rest = {} discard = False return Cookie(0, name, value, port, port_specified, domain , domain_specified, domain_initial_dot, path, path_specified , secure, expires, discard, None, None, rest) if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) toPyCookieJar(self.cookie_jar,cj) cj.save() elif cookie_storage.__class__.__name__.endswith('CookieJar') : toPyCookieJar(self.cookie_jar,cookie_storage) else: raise ValueError, 'unsupported cookie_storage type.' @can_load_page def set_field_value(self, selector, value, blur=True): """Sets the value of the field matched by given selector. :param selector: A CSS selector that target the field. :param value: The value to fill in. :param blur: An optional boolean that force blur when filled in. """ def _set_checkbox_value(el, value): el.setFocus() if value is True: el.setAttribute('checked', 'checked') else: el.removeAttribute('checked') def _set_checkboxes_value(els, value): for el in els: if el.attribute('value') == value: _set_checkbox_value(el, True) else: _set_checkbox_value(el, False) def _set_radio_value(els, value): for el in els: if el.attribute('value') == value: el.setFocus() el.setAttribute('checked', 'checked') def _set_text_value(el, value): el.setFocus() el.setAttribute('value', value) def _set_select_value(el, value): el.setFocus() self.evaluate('document.querySelector("%s").value = "%s";' % (selector.replace('"', '\"'), value.replace('"', '\"'))) def _set_textarea_value(el, value): el.setFocus() el.setPlainText(value) res, ressources = None, [] element = self.main_frame.findFirstElement(selector) if element.isNull(): raise Error('can\'t find element for %s"' % selector) if element.tagName() == "SELECT": _set_select_value(element, value) elif element.tagName() == "TEXTAREA": _set_textarea_value(element, value) elif element.tagName() == "INPUT": if element.attribute('type') in ["color", "date", "datetime", "datetime-local", "email", "hidden", "month", "number", "password", "range", "search", "tel", "text", "time", "url", "week"]: _set_text_value(element, value) elif element.attribute('type') == "checkbox": els = self.main_frame.findAllElements(selector) if els.count() > 1: _set_checkboxes_value(els, value) else: _set_checkbox_value(element, value) elif element.attribute('type') == "radio": _set_radio_value(self.main_frame.findAllElements(selector), value) elif element.attribute('type') == "file": Ghost._upload_file = value res, resources = self.click(selector) Ghost._upload_file = None else: raise Error('unsuported field tag') if blur: self.fire_on(selector, 'blur') return res, ressources def set_proxy(self, type, host='localhost', port=8888, user='', password=''): """Set up proxy for FURTHER connections. :param type: proxy type to use: \ none/default/socks5/https/http. :param host: proxy server ip or host name. :param port: proxy port. """ _types = {'default': QNetworkProxy.DefaultProxy, 'none': QNetworkProxy.NoProxy, 'socks5': QNetworkProxy.Socks5Proxy, 'https': QNetworkProxy.HttpProxy, 'http': QNetworkProxy.HttpCachingProxy } if type is None: type='none' type = type.lower() if type in ['none','default'] : self.manager.setProxy(QNetworkProxy(_types[type])) return elif type in _types: proxy = QNetworkProxy(_types[type], hostName=host, port=port , user=user, password=password ) self.manager.setProxy(proxy) else: raise ValueError, 'Unsupported proxy type:' + type \ + '\nsupported types are: none/socks5/http/https/default' def set_viewport_size(self, width, height): """Sets the page viewport size. :param width: An integer that sets width pixel count. :param height: An integer that sets height pixel count. """ self.page.setViewportSize(QSize(width, height)) def append_popup_message(self, message): self.popup_messages.append(str(message)) def show(self): """Show current page inside a QWebView. """ self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() def sleep(self, value): started_at = time.time() while True: if time.time() > (started_at + value): break time.sleep(0.01) Ghost._app.processEvents() def wait_for(self, condition, timeout_message): """Waits until condition is True. :param condition: A callable that returns the condition. :param timeout_message: The exception message on timeout. """ started_at = time.time() while not condition(): if time.time() > (started_at + self.wait_timeout): raise TimeoutError(timeout_message) time.sleep(0.01) Ghost._app.processEvents() if self.wait_callback is not None: self.wait_callback() def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: Ghost._alert is not None, 'User has not been alerted.') msg = Ghost._alert Ghost._alert = None return msg, self._release_last_resources() def wait_for_page_loaded(self): """Waits until page is loaded, assumed that a page as been requested. """ self.wait_for(lambda: self.loaded, 'Unable to load requested page') resources = self._release_last_resources() page = None url = self.main_frame.url().toString() url_without_hash = url.split("#")[0] for resource in resources: if url == resource.url or url_without_hash == resource.url: page = resource return page, resources def wait_for_selector(self, selector): """Waits until selector match an element on the frame. :param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) return True, self._release_last_resources() def wait_for_text(self, text): """Waits until given text appear on main frame. :param text: The text to wait for. """ self.wait_for(lambda: text in self.content, 'Can\'t find "%s" in current frame' % text) return True, self._release_last_resources() def _authenticate(self, mix, authenticator): """Called back on basic / proxy http auth. :param mix: The QNetworkReply or QNetworkProxy object. :param authenticator: The QAuthenticator object. """ if self._auth_attempt == 0: username, password = self._auth authenticator.setUser(username) authenticator.setPassword(password) self._auth_attempt += 1 def _page_loaded(self): """Called back when page is loaded. """ self.loaded = True self.cache.clear() def _page_load_started(self): """Called back when page load started. """ self.loaded = False def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources = self.http_resources self.http_resources = [] return last_resources def _request_ended(self, reply): """Adds an HttpResource object to http_resources. :param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): Logger.log("[%s] bytesAvailable()= %s" %(str(reply.url()), reply.bytesAvailable()), level="debug") # Some web pages return cache headers that mandates not to cache the # reply, which means we won't find this QNetworkReply in the cache # object. In this case bytesAvailable will return > 0. # Such pages are www.etsy.com # This is a bit of a hack and due to the async nature of QT, might # not work at times. We should move to using some proxied implementation # of QNetworkManager and QNetworkReply in order to get the contents # of the requests properly rather than relying on the cache. if reply.bytesAvailable() > 0: content = reply.peek(reply.bytesAvailable()) else: content = None self.http_resources.append(HttpResource(reply, self.cache, content=content)) def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. :param reply: The QNetworkReply object. """ self.wait_for(lambda: reply.isFinished(), 'Download timeout.') if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache, reply.readAll())) def _on_manager_ssl_errors(self, reply, errors): url = unicode(reply.url().toString()) if self.ignore_ssl_errors: reply.ignoreSslErrors() else: Logger.log('SSL certificate error: %s' % url, level='warning')
class Ghost(object): """Ghost manages a QWebPage. :param user_agent: The default User-Agent header. :param wait_timeout: Maximum step duration in second. :param wait_callback: An optional callable that is periodically executed until Ghost stops waiting. :param log_level: The optional logging level. :param display: A boolean that tells ghost to displays UI. :param viewport_size: A tupple that sets initial viewport size. :param ignore_ssl_errors: A boolean that forces ignore ssl errors. :param cache_dir: A directory path where to store cache datas. :param plugins_enabled: Enable plugins (like Flash). :param java_enabled: Enable Java JRE. :param plugin_path: Array with paths to plugin directories (default ['/usr/lib/mozilla/plugins']) :param download_images: Indicate if the browser should download images """ _alert = None _confirm_expected = None _prompt_expected = None _upload_file = None _app = None def __init__(self, user_agent=default_user_agent, wait_timeout=8, wait_callback=None, log_level=logging.WARNING, display=False, viewport_size=(800, 600), ignore_ssl_errors=True, cache_dir=os.path.join(tempfile.gettempdir(), "ghost.py"), plugins_enabled=False, java_enabled=False, plugin_path=['/usr/lib/mozilla/plugins',], download_images=True): self.http_resources = [] self.user_agent = user_agent self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.ignore_ssl_errors = ignore_ssl_errors self.loaded = True if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\ and not hasattr(Ghost, 'xvfb'): try: os.environ['DISPLAY'] = ':99' Ghost.xvfb = subprocess.Popen(['Xvfb', ':99']) except OSError: raise Exception('Xvfb is required to a ghost run oustside ' +\ 'an X instance') self.display = display if not Ghost._app: Ghost._app = QApplication.instance() or QApplication(['ghost']) if plugin_path: for p in plugin_path: Ghost._app.addLibraryPath(p) self.page = GhostWebPage(Ghost._app) QtWebKit.QWebSettings.setMaximumPagesInCache(0) QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0) QtWebKit.QWebSettings.globalSettings().setAttribute(QtWebKit.QWebSettings.LocalStorageEnabled, True) self.page.setForwardUnsupportedContent(True) self.page.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.page.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.page.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.page.loadFinished.connect(self._page_loaded) self.page.loadStarted.connect(self._page_load_started) self.page.unsupportedContent.connect(self._unsupported_content) self.manager = self.page.networkAccessManager() self.manager.finished.connect(self._request_ended) self.manager.sslErrors.connect(self._on_manager_ssl_errors) # Cache self.cache = QNetworkDiskCache() self.cache.setCacheDirectory(cache_dir) self.manager.setCache(self.cache) # Cookie jar self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) # User Agent self.page.setUserAgent(self.user_agent) self.page.networkAccessManager().authenticationRequired\ .connect(self._authenticate) self.page.networkAccessManager().proxyAuthenticationRequired\ .connect(self._authenticate) self.main_frame = self.page.mainFrame() logger.setLevel(log_level) if self.display: self.webview = QtWebKit.QWebView() if plugins_enabled: self.webview.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, True) if java_enabled: self.webview.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, True) self.webview.setPage(self.page) self.webview.show() def __del__(self): self.exit() def capture(self, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Returns snapshot as QImage. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ if region is None and selector is not None: region = self.region_for_selector(selector) if region: x1, y1, x2, y2 = region w, h = (x2 - x1), (y2 - y1) image = QImage(QSize(x2, y2), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() image = image.copy(x1, y1, w, h) else: image = QImage(self.page.viewportSize(), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() return image def capture_to(self, path, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Saves snapshot as image. :param path: The destination path. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ self.capture(region=region, format=format, selector=selector).save(path) @can_load_page def click(self, selector): """Click the targeted element. :param selector: A CSS3 selector to targeted element. """ if not self.exists(selector): raise Exception("Can't find element to click") return self.evaluate(""" var element = document.querySelector("%s"); var evt = document.createEvent("MouseEvents"); evt.initMouseEvent("click", true, true, window, 1, 1, 1, 1, 1, false, false, false, false, 0, element); element.dispatchEvent(evt) """ % selector) class confirm: """Statement that tells Ghost how to deal with javascript confirm(). :param confirm: A bollean that confirm. :param callable: A callable that returns a boolean for confirmation. """ def __init__(self, confirm=True, callback=None): self.confirm = confirm self.callback = callback def __enter__(self): Ghost._confirm_expected = (self.confirm, self.callback) def __exit__(self, type, value, traceback): Ghost._confirm_expected = None @property def content(self): """Returns current frame HTML as a string.""" return unicode(self.main_frame.toHtml()) @property def cookies(self): """Returns all cookies.""" return self.cookie_jar.allCookies() def delete_cookies(self): """Deletes all cookies.""" self.cookie_jar.setAllCookies([]) @can_load_page def evaluate(self, script): """Evaluates script in page frame. :param script: The script to evaluate. """ return (self.main_frame.evaluateJavaScript("%s" % script), self._release_last_resources()) def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. :param path: The path of the file. :param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def exists(self, selector): """Checks if element exists for given selector. :param string: The element selector. """ return not self.main_frame.findFirstElement(selector).isNull() def exit(self): """Exits application and relateds.""" if self.display: self.webview.close() Ghost._app.quit() del self.manager del self.page del self.main_frame if hasattr(self, 'xvfb'): self.xvfb.terminate() @can_load_page def fill(self, selector, values): """Fills a form with provided values. :param selector: A CSS selector to the target form to fill. :param values: A dict containing the values. """ if not self.exists(selector): raise Exception("Can't find form") resources = [] for field in values: r, res = self.set_field_value("%s [name=%s]" % (selector, field), values[field]) resources.extend(res) return True, resources @can_load_page def fire_on(self, selector, method): """Call method on element matching given selector. :param selector: A CSS selector to the target element. :param method: The name of the method to fire. :param expect_loading: Specifies if a page loading is expected. """ return self.evaluate('document.querySelector("%s").%s();' % \ (selector, method)) def global_exists(self, global_name): """Checks if javascript global exists. :param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name)[0] def hide(self): """Close the webview.""" try: self.webview.close() except: raise Exception("no webview to close") def open(self, address, method='get', headers={}, auth=None, body=None): """Opens a web page. :param address: The resource URL. :param method: The Http method. :param headers: An optional dict of extra request hearders. :param auth: An optional tupple of HTTP auth (username, password). :param body: An optional string containing a payload. :return: Page resource, All loaded resources. """ body = body or QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Exception("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl(0) for header in headers: request.setRawHeader(header, headers[header]) self._auth = auth self._auth_attempt = 0 # Avoids reccursion self.main_frame.load(request, method, body) self.loaded = False return self.wait_for_page_loaded() class prompt: """Statement that tells Ghost how to deal with javascript prompt(). :param value: A string value to fill in prompt. :param callback: A callable that returns the value to fill in. """ def __init__(self, value='', callback=None): self.value = value self.callback = callback def __enter__(self): Ghost._prompt_expected = (self.value, self.callback) def __exit__(self, type, value, traceback): Ghost._prompt_expected = None def region_for_selector(self, selector): """Returns frame region for given selector as tupple. :param selector: The targeted element. """ geo = self.main_frame.findFirstElement(selector).geometry() try: region = (geo.left(), geo.top(), geo.right(), geo.bottom()) except: raise Exception("can't get region for selector '%s'" % selector) return region @can_load_page def set_field_value(self, selector, value, blur=True): """Sets the value of the field matched by given selector. :param selector: A CSS selector that target the field. :param value: The value to fill in. :param blur: An optional boolean that force blur when filled in. """ def _set_checkbox_value(el, value): el.setFocus() if value is True: el.setAttribute('checked', 'checked') else: el.removeAttribute('checked') def _set_checkboxes_value(els, value): for el in els: if el.attribute('value') == value: _set_checkbox_value(el, True) else: _set_checkbox_value(el, False) def _set_radio_value(els, value): for el in els: if el.attribute('value') == value: el.setFocus() el.setAttribute('checked', 'checked') def _set_text_value(el, value): el.setFocus() el.setAttribute('value', value) def _set_textarea_value(el, value): el.setFocus() el.setPlainText(value) res, ressources = None, [] element = self.main_frame.findFirstElement(selector) if element.isNull(): raise Exception('can\'t find element for %s"' % selector) if element.tagName() == "SELECT": _set_text_value(element, value) elif element.tagName() == "TEXTAREA": _set_textarea_value(element, value) elif element.tagName() == "INPUT": if element.attribute('type') in ["color", "date", "datetime", "datetime-local", "email", "hidden", "month", "number", "password", "range", "search", "tel", "text", "time", "url", "week"]: _set_text_value(element, value) elif element.attribute('type') == "checkbox": els = self.main_frame.findAllElements(selector) if els.count() > 1: _set_checkboxes_value(els, value) else: _set_checkbox_value(element, value) elif element.attribute('type') == "radio": _set_radio_value(self.main_frame.findAllElements(selector), value) elif element.attribute('type') == "file": Ghost._upload_file = value res, resources = self.click(selector) Ghost._upload_file = None else: raise Exception('unsuported field tag') if blur: self.fire_on(selector, 'blur') return res, ressources def set_viewport_size(self, width, height): """Sets the page viewport size. :param width: An integer that sets width pixel count. :param height: An integer that sets height pixel count. """ self.page.setViewportSize(QSize(width, height)) def show(self): """Show current page inside a QWebView. """ self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() def wait_for(self, condition, timeout_message): """Waits until condition is True. :param condition: A callable that returns the condition. :param timeout_message: The exception message on timeout. """ started_at = time.time() while not condition(): if time.time() > (started_at + self.wait_timeout): raise Exception(timeout_message) time.sleep(0.01) Ghost._app.processEvents() if self.wait_callback is not None: self.wait_callback() def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: Ghost._alert is not None, 'User has not been alerted.') msg = Ghost._alert Ghost._alert = None return msg, self._release_last_resources() def wait_for_page_loaded(self): """Waits until page is loaded, assumed that a page as been requested. """ self.wait_for(lambda: self.loaded, 'Unable to load requested page') resources = self._release_last_resources() page = None url = self.main_frame.url().toString() for resource in resources: if url == resource.url: page = resource return page, resources def wait_for_selector(self, selector): """Waits until selector match an element on the frame. :param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) return True, self._release_last_resources() def wait_for_text(self, text): """Waits until given text appear on main frame. :param text: The text to wait for. """ self.wait_for(lambda: text in self.content, 'Can\'t find "%s" in current frame' % text) return True, self._release_last_resources() def _authenticate(self, mix, authenticator): """Called back on basic / proxy http auth. :param mix: The QNetworkReply or QNetworkProxy object. :param authenticator: The QAuthenticator object. """ if self._auth_attempt == 0: username, password = self._auth authenticator.setUser(username) authenticator.setPassword(password) self._auth_attempt += 1 def _page_loaded(self): """Called back when page is loaded. """ self.loaded = True self.cache.clear() def _page_load_started(self): """Called back when page load started. """ self.loaded = False def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources = self.http_resources self.http_resources = [] return last_resources def _request_ended(self, reply): """Adds an HttpResource object to http_resources. :param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache)) def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. :param reply: The QNetworkReply object. """ self.wait_for(lambda: reply.isFinished(), 'Download timeout.') if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache, reply.readAll())) def _on_manager_ssl_errors(self, reply, errors): url = unicode(reply.url().toString()) if self.ignore_ssl_errors: reply.ignoreSslErrors() else: Logger.log('SSL certificate error: %s' % url, level='warning')
class GRobot(object): _loop = None _liveRobot = 0 _app = None exit_lock = RLock() def __init__(self, user_agent=default_user_agent, operate_timeout=10, loading_timeout=60, log_level=logging.WARNING, display=False, viewport_size=(1024, 768), accept_language='en,*', ignore_ssl_errors=True, cache_dir=os.path.join(tempfile.gettempdir(), "GRobot"), image_enabled=True, plugins_enabled=False, java_enabled=False, javascript_enabled=True, plugin_path=None, develop=False, proxy=None, sleep=0.5, jquery_namespace='GRobot'): """GRobot manages a QWebPage. @param user_agent: The default User-Agent header. @param operate_timeout: Operation timeout. @param loading_timeout: The page loading timeout. @param log_level: The optional logging level. @param display: A boolean that tells GRobot to displays UI. @param viewport_size: A tupple that sets initial viewport size. @param accept_language: Set the webkit accept language. @param ignore_ssl_errors: A boolean that forces ignore ssl errors. @param cache_dir: A directory path where to store cache datas. @param image_enabled: Enable images. @param plugins_enabled: Enable plugins (like Flash). @param java_enabled: Enable Java JRE. @param javascript_enabled: Enable Javascript. @param plugin_path: Array with paths to plugin directories (default ['/usr/lib/mozilla/plugins']) @param develop: Enable the Webkit Inspector. @param proxy: Set a Socks5,HTTP{S} Proxy @param sleep: Sleep `sleep` second,after operate @param jquery_namespace: Set the jQuery namespace. """ GRobot.exit_lock.acquire() logger.setLevel(log_level) plugin_path = plugin_path or ['/usr/lib/mozilla/plugins', ] GRobot._liveRobot += 1 self.develop = develop self.inspector = None self.plugin = False self.exitLoop = False self.set_proxy(proxy) self.sleep = sleep self.jquery_namespace = jquery_namespace self.popup_messages = None self.accept_language = accept_language self._loaded = True self._confirm_expected = None self._prompt_expected = None self._upload_file = None self._alert = None self.http_resources = [] self.user_agent = user_agent self.loading_timeout = loading_timeout self.operate_timeout = operate_timeout self.ignore_ssl_errors = ignore_ssl_errors if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ \ and not hasattr(GRobot, 'xvfb'): try: os.environ['DISPLAY'] = ':99' GRobot.xvfb = subprocess.Popen(['Xvfb', ':99']) except OSError: raise Exception('Xvfb is required to a GRobot run oustside ' + \ 'an X instance') self.display = display if not GRobot._app: GRobot._app = QApplication.instance() or QApplication(['GRobot']) if plugin_path: for p in plugin_path: GRobot._app.addLibraryPath(p) self.page = GRobotWebPage(self, GRobot._app) QtWebKit.QWebSettings.setMaximumPagesInCache(0) QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0) QtWebKit.QWebSettings.globalSettings().setAttribute(QtWebKit.QWebSettings.LocalStorageEnabled, True) self.page.setForwardUnsupportedContent(True) # Page signals self.page.loadFinished.connect(self._page_loaded) self.page.loadStarted.connect(self._page_load_started) self.page.unsupportedContent.connect(self._unsupported_content) self.manager = self.page.networkAccessManager() #TODO:Think about how to handle the network accessible signal #self.manager.networkAccessibleChanged.connect() self.manager.finished.connect(self._request_ended) self.manager.sslErrors.connect(self._on_manager_ssl_errors) # Cache self.cache = QNetworkDiskCache() self.cache.setCacheDirectory(cache_dir) self.manager.setCache(self.cache) # Cookie jar self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) # User Agent self.page.setUserAgent(self.user_agent) self.page.networkAccessManager().authenticationRequired \ .connect(self._authenticate) self.page.networkAccessManager().proxyAuthenticationRequired \ .connect(self._authenticate) self.main_frame = self.page.mainFrame() self.webview = None self.viewport_size = viewport_size self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() if display else self.webview.hide() self.set_viewport_size(*viewport_size) self.page.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.page.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.page.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, self.develop) self.enable_image = image_enabled self.enable_javascript = javascript_enabled #always open link in current window instead of new window self.page.setLinkDelegationPolicy(QWebPage.DelegateAllLinks) self.page.linkClicked.connect(self._link_clicked) #start the qt main loop GRobot._loop = QtMainLoop(GRobot._app) GRobot._loop.start() GRobot.exit_lock.release() @property def popup_messages(self): return self._popup_messages @popup_messages.setter def popup_messages(self, value): self._popup_messages = unicode(value) @property def url(self): return unicode(self.main_frame.url().toString()) @property def content(self): """Returns current frame HTML as a string.""" return unicode(self.page.currentFrame().toHtml()) @property def cookies(self): """Returns all cookies.""" return self.cookie_jar.allCookies() @property def enable_image(self): """Disable the page images can speed up page loading. """ return self._enable_image @enable_image.setter def enable_image(self, value): self.page.settings().setAttribute(QWebSettings.AutoLoadImages, value) self._enable_image = value #TODO:It seems not work? # @enable_image.deleter # def enable_image(self): # raise NotImplemented @property def enable_javascript(self): """Disable the page javascript can speed up page loading. """ return self._enable_javascript @enable_javascript.setter def enable_javascript(self, value): self.page.settings().setAttribute(QWebSettings.JavascriptEnabled, value) self._enable_javascript = value def open(self, address, method='get', headers=None, auth=None, body=None, default_popup_response=None): """Opens a web page. @param address: The resource URL. @param method: The Http method. @param headers: An optional dict of extra request hearders. @param auth: An optional tupple of HTTP auth (username, password). @param body: An optional string containing a payload. @param default_popup_response: the default response for any confirm/ alert/prompt popup from the Javascript (replaces the need for the with blocks) """ headers = headers or {} body = body or QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Exception("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl = 0 for header in headers: request.setRawHeader(header, headers[header]) self._auth = auth self._auth_attempt = 0 # Avoids reccursion self.page.mainFrame().load(request, method, body) self._loaded = False if default_popup_response is not None: self._prompt_expected = (default_popup_response, None) self._confirm_expected = (default_popup_response, None) return self.wait_for_page_loaded() def set_viewport_size(self, width, height): """Sets the page viewport size. @param width: An integer that sets width pixel count. @param height: An integer that sets height pixel count. """ if self.display: self.webview.resize(QSize(width, height)) self.page.setViewportSize(QSize(width, height)) def set_proxy(self, proxy=None): """Set the proxy or using system configuration as None,supported socks5 http{s}. @param proxy: Example:socks5://username:[email protected]:7070 """ proxy_type = None if proxy: parse = urlparse(proxy) scheme = parse.scheme hostname = parse.hostname port = parse.port username = parse.username or '' password = parse.password or '' if scheme == 'socks5': proxy_type = QNetworkProxy.Socks5Proxy elif scheme in ('http', 'https'): proxy_type = QNetworkProxy.HttpProxy if proxy_type: self.page.networkAccessManager().setProxy( QNetworkProxy(proxy_type, hostname, port, username, password) ) else: QNetworkProxyFactory.setUseSystemConfiguration(True) def first_element_position(self, selector): try: return self.elements_position(selector)[0] except IndexError: logger.warning("Can't locate selector " + selector) return None def elements_position(self, selector): """Get the position of elements whose match selector @param selector: @return: position of QPoint """ attr, pattern, val = self.parser_selector(selector, attr='identifier') strip = lambda v: v.strip() if pattern: val = locals()[pattern](val) def identifier(query): return id(query) or name(query) def name(query): return css("*[name='%s']" % query) def id(query): return css('#' + query) def link(query): return xpath(u"//a[@text()='%s']" % query.replace("\'", "\\'")) def css(query): result = [] for ele in self.main_frame.findAllElements(query): if not ele.isNull(): result.append(ele.geometry().center()) return result def xpath(query): positions = self.evaluate(u""" function GetAbsoluteLocationEx(element) { if ( arguments.length != 1 || element == null ) { return null; } var elmt = element; var offsetTop = elmt.offsetTop; var offsetLeft = elmt.offsetLeft; var offsetWidth = elmt.offsetWidth; var offsetHeight = elmt.offsetHeight; while( elmt = elmt.offsetParent ) { // add this judge if ( elmt.style.position == 'absolute' || elmt.style.position == 'relative' || ( elmt.style.overflow != 'visible' && elmt.style.overflow != '' ) ) { break; } offsetTop += elmt.offsetTop; offsetLeft += elmt.offsetLeft; } return { absoluteTop: offsetTop, absoluteLeft: offsetLeft, offsetWidth: offsetWidth, offsetHeight: offsetHeight }; } result=[]; for (var r = document.evaluate('%s', document, null, 5, null), n; n = r.iterateNext();) { pos=GetAbsoluteLocationEx(n) result.push([pos.absoluteLeft+pos.offsetWidth/2.0,pos.absoluteTop+pos.offsetHeight/2.0]); } result """ % query.replace("\'", "\\'")) return map(lambda x: QPoint(*tuple(x)), positions) return locals()[attr](val) def _move_page_center_to(self, qpoint): size = self.page.viewportSize() self.main_frame.setScrollPosition(qpoint - QPoint(size.width(), size.height()) / 2) def reload(self): """Reload page. @return: """ self.trigger_action('Reload', expect_loading=True) def back(self): self.trigger_action('Back') def forward(self): self.trigger_action('Forward') @can_load_page def trigger_action(self, action): """Trigger QWebPage::WebAction @param action: """ self.page.triggerAction(getattr(QWebPage, action)) def parser_selector(self, selector, attr=None, pattern=None, val=None): index = selector.find('=') if index <= 0: val = selector else: attr = selector[:index] value_ = selector[index + 1:] index = value_.find(':') if index > 0: pattern = value_[:index] val = value_[index + 1:] return attr, pattern, val @can_load_page @have_a_break def click(self, selector): qpoint = self.first_element_position(selector) if qpoint: return self._click_position(qpoint) @can_load_page def _click_position(self, qpoint): self._move_page_center_to(qpoint) self.webview.repaint() pos = qpoint - self.main_frame.scrollPosition() self._move_to_position(pos) QTest.mouseClick(self.webview, Qt.LeftButton, pos=pos) gevent.sleep(1) return pos def qpoint_to_tuple(self, qpoint): return qpoint.x(), qpoint.y() @have_a_break def move_to(self, selector): qpoint = self.first_element_position(selector) if qpoint: self._move_to_position(qpoint) return qpoint_to_tuple(qpoint) def move_at(self, x, y): self._move_to_position(QPoint(x, y)) def _move_to_position(self, qpoint): QTest.mouseMove(self.webview, pos=qpoint) return qpoint @have_a_break def click_at(self, x, y): self._click_position(QPoint(x, y)) @have_a_break def key_clicks(self, selector, text): if selector: self.click(selector) QTest.keyClicks(self.webview, text, delay=50) @have_a_break def type(self, selector, text): position = self.click(selector) ele = self._hit_element_from(position) ele.setFocus() ele.evaluateJavaScript( u""" core.events.setValue(this, '%s') """ % (text.replace("\n", "\\n").replace("\'", "\\'")) ) logger.debug('type %s %s' % (selector, text)) def _hit_element_from(self, position): return self.main_frame.hitTestContent(position).element() def first_element(self, selector): position = self.first_element_position(selector) if position: return self.main_frame.hitTestContent(position).element(), position def wait_forever(self): self.wait_for(lambda: False, time_for_stop=-1) @have_a_break def check(self, selector, checked=True): ele, position = self.first_element(selector) if ele and ele.tagName() == 'INPUT': if ele.attribute('type') in ['checkbox', 'radio']: ele_checked = ele.attribute('checked') == 'checked' or False if ele_checked != checked: self._click_position(position) else: raise ValueError, "%s is not a checkbox or radio" % selector @have_a_break def select(self, selector, value): def _select(query, select_by, select): select.evaluateJavaScript(u""" triggerEvent(this, 'focus', false); var changed = false; var optionToSelect = '%s'; for (var i = 0; i < this.options.length; i++) { var option = this.options[i]; if (option.selected && option.%s != optionToSelect) { option.selected = false; changed = true; } else if (!option.selected && option.%s == optionToSelect) { option.selected = true; changed = true; } } if (changed) { triggerEvent(this, 'change', true); } """ % ( query.replace("\'", "\\'"), select_by, select_by)) def _add_selection(query, select_by, select, selected): select.evaluateJavaScript(u""" triggerEvent(this, 'focus', false); var optionToSelect = '%s'; for (var i = 0; i < this.options.length; i++) { var option = this.options[i]; if (option.%s == optionToSelect) { option.selected = %s; triggerEvent(this, 'change', true); } } """ % ( query.replace("\'", "\\'"), select_by, selected and 'true' or 'false')) ele, position = self.first_element(selector) if ele and ele.tagName() == 'SELECT': ele.setFocus() if ele.attribute('multiple') == 'multiple': assert isinstance(value, list) for value_, selected in value: attr, pattern, val = self.parser_selector(value_, attr='text') _add_selection(val, attr, ele, selected) else: attr, pattern, val = self.parser_selector(value, attr='text') _select(val, attr, ele) def choose_file(self, selector, file): self._upload_file = file self.click(selector) self._upload_file = None def capture(self, selector=None): """Capture the images of selector. @param selector: Css selector. @return: Images """ elements = self.main_frame.documentElement().findAll(selector) imgs = [] for element in elements: geo = element.geometry() img = QImage(geo.width(), geo.height(), QImage.Format_ARGB32) painter = QPainter(img) element.render(painter) painter.end() imgs.append(img) return imgs def capture_to(self, path, selector=None): """Capture the images of selector to files. @param path: File path with index suffix. @param selector: Css selector. @return: The paths of saving. """ _, ext = os.path.splitext(path) ext = ext[1:] imgs = self.capture(selector) result = [] for index, img in enumerate(imgs): filepath = '%s.%s' % (path, index) if img.save(filepath, ext.upper()): result.append(filepath) return result def capture_to_buf(self, selector=None): """capture the images of selector to StringIO @param selector: Css selector. @return: The StringIO list. """ images = self.capture(selector) result = [] for image in images: ba = QByteArray() buf = QBuffer(ba) buf.open(QIODevice.ReadWrite) image.save(buf, 'jpg') stream = StringIO(str(buf.buffer())) result.append(stream) return result @can_load_page def evaluate(self, script): """Evaluates script in page frame. @param script: The script to evaluate. """ result = self.main_frame.evaluateJavaScript("%s" % script) # if isinstance(result,QString): # result=unicode(result) return result def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. @param path: The path of the file. @param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def __del__(self): """Depend on the CG of Python. """ self._exit() def delete_cookies(self): """Deletes all cookies.""" self.cookie_jar.setAllCookies([]) def exists(self, selector): """Checks if element exists for given selector. @param string: The element selector. """ return not self.main_frame.findFirstElement(selector).isNull() #TODO: Still not work. # def remove_css(self): # """Remore the css,speed up page loading. # # @return: # """ # # return self.evaluate("""var targetelement="link";//determine element type to create nodelist from # var targetattr="href"//determine corresponding attribute to test for # var allsuspects=document.getElementsByTagName(targetelement) # for (var i=allsuspects.length; i>=0; i--){ //search backwards within nodelist for matching elements to remove # if (allsuspects[i] && allsuspects[i].getAttribute(targetattr)!=null ) # allsuspects[i].parentNode.removeChild(allsuspects[i]); //remove element by calling parentNode.removeChild() # } # """) def filter_resources(self, pattern): """Filter resources with pattern. @param pattern: Match pattern. @param resources: @return: @raise: """ if isinstance(pattern, basestring): is_match = lambda x: pattern == x elif isinstance(pattern, _pattern_type): is_match = lambda x: pattern.match(x) elif hasattr(pattern, '__call__'): is_match = pattern else: raise TypeError, 'pattern must be one of str,re.compile,callable' return filter(lambda x: is_match(x.request_url), self.http_resources)[:] def save(self, path): """Save current page content to the path. @param path: The path to save. """ f = open(path, 'w') f.write(self.content.encode('utf-8')) f.close() def global_exists(self, global_name): """Checks if javascript global exists. @param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name) def load_cookies( self, cookie_storage, keep_old=False ): """load from cookielib's CookieJar or Set-Cookie3 format text file. @param cookie_storage: file location string on disk or CookieJar instance. @param keep_old: Don't reset, keep cookies not overridden. """ def toQtCookieJar( PyCookieJar, QtCookieJar ): allCookies = QtCookieJar.cookies if keep_old else [] for pc in PyCookieJar: qc = toQtCookie(pc) allCookies.append(qc) QtCookieJar.setAllCookies(allCookies) def toQtCookie(PyCookie): qc = QNetworkCookie(PyCookie.name, PyCookie.value) qc.setSecure(PyCookie.secure) if PyCookie.path_specified: qc.setPath(PyCookie.path) if PyCookie.domain != "": qc.setDomain(PyCookie.domain) if PyCookie.expires != 0: t = QDateTime() t.setTime_t(PyCookie.expires) qc.setExpirationDate(t) # not yet handled(maybe less useful): # py cookie.rest / QNetworkCookie.setHttpOnly() return qc if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) cj.load() toQtCookieJar(cj, self.cookie_jar) elif cookie_storage.__class__.__name__.endswith('CookieJar'): toQtCookieJar(cookie_storage, self.cookie_jar) else: raise ValueError, 'unsupported cookie_storage type.' def save_cookies(self, cookie_storage): """Save to cookielib's CookieJar or Set-Cookie3 format text file. @param cookie_storage: file location string or CookieJar instance. """ def toPyCookieJar(QtCookieJar, PyCookieJar): for c in QtCookieJar.allCookies(): PyCookieJar.set_cookie(toPyCookie(c)) def toPyCookie(QtCookie): port = None port_specified = False secure = QtCookie.isSecure() name = str(QtCookie.name()) value = str(QtCookie.value()) v = str(QtCookie.path()) path_specified = bool(v != "") path = v if path_specified else None v = str(QtCookie.domain()) domain_specified = bool(v != "") domain = v domain_initial_dot = v.startswith('.') if domain_specified else None v = long(QtCookie.expirationDate().toTime_t()) # Long type boundary on 32bit platfroms; avoid ValueError expires = 2147483647 if v > 2147483647 else v rest = {} discard = False return Cookie(0, name, value, port, port_specified, domain , domain_specified, domain_initial_dot, path, path_specified , secure, expires, discard, None, None, rest) if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) toPyCookieJar(self.cookie_jar, cj) cj.save() elif cookie_storage.__class__.__name__.endswith('CookieJar'): toPyCookieJar(self.cookie_jar, cookie_storage) else: raise ValueError, 'unsupported cookie_storage type.' def wait_for_confirm(self, confirm=True, callback=None): """Statement that tells GRobot how to deal with javascript confirm(). @param confirm: A bollean that confirm. @param callable: A callable that returns a boolean for confirmation. """ self._robot._confirm_expected = (confirm, callback) self._robot.wait_for(lambda: self._robot._confirm_expected is None) return self.popup_messages def wait_for_text(self, text, time_for_stop=None): """Waits until given text appear on main frame. @param text: The text to wait for. @return: """ logger.debug("Wait for text %s" % text) self.wait_for(lambda: text in self.content, "Can\'t find '%s' in current frame" % text, time_for_stop=time_for_stop) return self.wait_for_page_loaded() def wait_for_xpath(self, expression, time_for_stop=None): self.wait_for(lambda: XPath(self.content).execute(expression), "Can't find xpath=%s in current frame" % expression, time_for_stop=time_for_stop) return self.wait_for_page_loaded() def wait_for_selector(self, selector): """Waits until selector match an element on the frame. @param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) def wait_for_page_loaded(self, time_for_stop=None): """Waits until page is loaded, assumed that a page as been requested. """ return self.wait_for(lambda: self._loaded, 'Unable to load requested page', time_for_stop=time_for_stop) def wait_for(self, condition, timeout_message='', time_for_stop=None): """Waits until condition is True. @param condition: A callable that returns the condition. @param timeout_message: The exception message on timeout.-1 means never timeout. """ if self._loaded: time_for_stop = time_for_stop or self.operate_timeout else: time_for_stop = time_for_stop or self.loading_timeout started_at = time.time() while not condition(): if time_for_stop != -1 and time.time() > (started_at + time_for_stop): if self._loaded: raise OperateTimeout, timeout_message else: # raise LoadingTimeout, timeout_message self.trigger_action('Stop') #QWebPage::Stop self._loaded = True logger.warning("Page loading timeout.Force to stop the page") break gevent.sleep(2) def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: self._alert is not None, 'User has not been alerted.') msg, self._alert = self._alert, None return msg def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources, self.http_resources = self.http_resources[:], [] return last_resources def _page_loaded(self, success): if self.develop and self.display: if self.inspector is None: self.inspector = QWebInspector() self.inspector.setPage(self.page) self.inspector.show() scripts = [ 'atoms.js', 'htmlutils.js', ] if self.jquery_namespace: scripts.append('jquery-1.9.1.min.js', ) for script in scripts: self.evaluate_js_file(os.path.dirname(__file__) + '/javascripts/' + script) if self.jquery_namespace: self.evaluate(u"%s=jQuery.noConflict();" % self.jquery_namespace) self._loaded = True # self.cache.clear() logger.debug("Page load finished") def _page_load_started(self): logger.debug("Start load page") self._loaded = False def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. @param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache, reply.readAll())) def _link_clicked(self, href): """Contorl the page link clicked event,forbid open new window. @param href: The href attribute of a tag. """ self.main_frame.load(href) def _request_ended(self, reply): """Adds an HttpResource object to http_resources. @param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache)) def _authenticate(self, mix, authenticator): """Called back on basic / proxy http auth. @param mix: The QNetworkReply or QNetworkProxy object. @param authenticator: The QAuthenticator object. """ if self._auth_attempt == 0: username, password = self._auth authenticator.setUser(username) authenticator.setPassword(password) self._auth_attempt += 1 def _on_manager_ssl_errors(self, reply, errors): """Ingore all the ssl error @param reply: @param errors: """ url = unicode(reply.url().toString()) if self.ignore_ssl_errors: reply.ignoreSslErrors() else: logger.warning('SSL certificate error: %s' % url) def _exit(self): """Destroy the Qt main event loop. """ GRobot.exit_lock.acquire() if self.inspector: self.inspector.close() sip.delete(self.inspector) if self.display: self.webview.close() sip.delete(self.webview) if self.page and not sip.isdeleted(self.page): sip.delete(self.page) GRobot._liveRobot -= 1 if GRobot._liveRobot == 0 and GRobot._loop is not None: GRobot._loop.stop() GRobot._loop = None GRobot._app = None if hasattr(self, 'xvfb'): GRobot.xvfb.terminate() GRobot.exit_lock.release()
class GRobot(object): _loop = None _liveRobot = 0 _app = None _kill_loop=None exit_lock = RLock() def __init__(self, user_agent=default_user_agent, operate_timeout=10, loading_timeout=60, log_level=logging.WARNING, display=False, viewport_size=(1024, 768), accept_language='en,*', ignore_ssl_errors=True, cache_dir=os.path.join(tempfile.gettempdir(), "GRobot"), image_enabled=True, plugins_enabled=False, java_enabled=False, javascript_enabled=True, plugin_path=None, develop=False, proxy=None, sleep=0.5, jquery_namespace='GRobot'): """GRobot manages a QWebPage. @param user_agent: The default User-Agent header. @param operate_timeout: Operation timeout. @param loading_timeout: The page loading timeout. @param log_level: The optional logging level. @param display: A boolean that tells GRobot to displays UI. @param viewport_size: A tupple that sets initial viewport size. @param accept_language: Set the webkit accept language. @param ignore_ssl_errors: A boolean that forces ignore ssl errors. @param cache_dir: A directory path where to store cache datas. @param image_enabled: Enable images. @param plugins_enabled: Enable plugins (like Flash). @param java_enabled: Enable Java JRE. @param javascript_enabled: Enable Javascript. @param plugin_path: Array with paths to plugin directories (default ['/usr/lib/mozilla/plugins']) @param develop: Enable the Webkit Inspector. @param proxy: Set a Socks5,HTTP{S} Proxy @param sleep: Sleep `sleep` second,after operate @param jquery_namespace: Set the jQuery namespace. """ GRobot.exit_lock.acquire() if GRobot._kill_loop: gevent.kill(GRobot._kill_loop) GRobot._kill_loop=None logger.setLevel(log_level) plugin_path = plugin_path or ['/usr/lib/mozilla/plugins', ] GRobot._liveRobot += 1 self.develop = develop self.inspector = None self.plugin = False self.exitLoop = False self._deleted = False self.set_proxy(proxy) self.sleep = sleep self.jquery_namespace = jquery_namespace self.popup_messages = None self.accept_language = accept_language self._loaded = True self._confirm_expected = None self._prompt_expected = None self._upload_file = None self._alert = None self.http_resources = [] self.user_agent = user_agent self.loading_timeout = loading_timeout self.operate_timeout = operate_timeout self.ignore_ssl_errors = ignore_ssl_errors if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ \ and not hasattr(GRobot, 'xvfb'): try: os.environ['DISPLAY'] = ':99' GRobot.xvfb = subprocess.Popen(['Xvfb', ':99']) except OSError: raise Exception('Xvfb is required to a GRobot run oustside ' + \ 'an X instance') self.display = display if not GRobot._app: GRobot._app = QApplication.instance() or QApplication(['GRobot']) if plugin_path: for p in plugin_path: GRobot._app.addLibraryPath(p) self.page = GRobotWebPage(self, GRobot._app) QtWebKit.QWebSettings.setMaximumPagesInCache(0) QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0) QtWebKit.QWebSettings.globalSettings().setAttribute(QtWebKit.QWebSettings.LocalStorageEnabled, True) self.page.setForwardUnsupportedContent(True) # Page signals self.page.loadFinished.connect(self._page_loaded) self.page.loadStarted.connect(self._page_load_started) self.page.unsupportedContent.connect(self._unsupported_content) self.manager = self.page.networkAccessManager() #TODO:Think about how to handle the network accessible signal #self.manager.networkAccessibleChanged.connect() self.manager.finished.connect(self._request_ended) self.manager.sslErrors.connect(self._on_manager_ssl_errors) # Cache self.cache = QNetworkDiskCache() self.cache.setCacheDirectory(cache_dir) self.manager.setCache(self.cache) # Cookie jar self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) # User Agent self.page.setUserAgent(self.user_agent) self.page.networkAccessManager().authenticationRequired \ .connect(self._authenticate) self.page.networkAccessManager().proxyAuthenticationRequired \ .connect(self._authenticate) self.main_frame = self.page.mainFrame() self.webview = None self.viewport_size = viewport_size self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() if display else self.webview.hide() self.set_viewport_size(*viewport_size) self.page.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.page.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.page.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, self.develop) self.enable_image = image_enabled self.enable_javascript = javascript_enabled #always open link in current window instead of new window self.page.setLinkDelegationPolicy(QWebPage.DelegateAllLinks) self.page.linkClicked.connect(self._link_clicked) #start the qt main loop GRobot._loop = QtMainLoop(GRobot._app) GRobot._loop.start() GRobot.exit_lock.release() @property def popup_messages(self): return self._popup_messages @popup_messages.setter def popup_messages(self, value): self._popup_messages = str(value) @property def url(self): return str(self.main_frame.url().toString()) def content(self): """Returns current frame HTML as a string.""" return str(self.main_frame.toHtml()) @property def cookies(self): """Returns all cookies.""" return self.cookie_jar.allCookies() @property def enable_image(self): """Disable the page images can speed up page loading. """ return self._enable_image @enable_image.setter def enable_image(self, value): self.page.settings().setAttribute(QWebSettings.AutoLoadImages, value) self._enable_image = value #TODO:It seems not work? # @enable_image.deleter # def enable_image(self): # raise NotImplemented @property def enable_javascript(self): """Disable the page javascript can speed up page loading. """ return self._enable_javascript @enable_javascript.setter def enable_javascript(self, value): self.page.settings().setAttribute(QWebSettings.JavascriptEnabled, value) self._enable_javascript = value def open(self, address, method='get', headers=None, auth=None, body=None, default_popup_response=None): """Opens a web page. @param address: The resource URL. @param method: The Http method. @param headers: An optional dict of extra request hearders. @param auth: An optional tupple of HTTP auth (username, password). @param body: An optional string containing a payload. @param default_popup_response: the default response for any confirm/ alert/prompt popup from the Javascript (replaces the need for the with blocks) """ headers = headers or {} body = body or QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Exception("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl = 0 for header in headers: request.setRawHeader(header, headers[header]) self._auth = auth self._auth_attempt = 0 # Avoids reccursion self.page.mainFrame().load(request, method, body) self._loaded = False if default_popup_response is not None: self._prompt_expected = (default_popup_response, None) self._confirm_expected = (default_popup_response, None) return self.wait_for_page_loaded() def set_viewport_size(self, width, height): """Sets the page viewport size. @param width: An integer that sets width pixel count. @param height: An integer that sets height pixel count. """ if self.display: self.webview.resize(QSize(width, height)) self.page.setViewportSize(QSize(width, height)) def set_proxy(self, proxy=None): """Set the proxy or using system configuration as None,supported socks5 http{s}. @param proxy: Example:socks5://username:[email protected]:7070 """ proxy_type = None if proxy: parse = urlparse(proxy) scheme = parse.scheme hostname = parse.hostname port = parse.port username = parse.username or '' password = parse.password or '' if scheme == 'socks5': proxy_type = QNetworkProxy.Socks5Proxy elif scheme in ('http', 'https'): proxy_type = QNetworkProxy.HttpProxy if proxy_type: self.page.networkAccessManager().setProxy( QNetworkProxy(proxy_type, hostname, port, username, password) ) else: QNetworkProxyFactory.setUseSystemConfiguration(True) def first_element_position(self, selector): try: return self.elements_position(selector)[0] except IndexError: logger.warning("Can't locate selector " + selector) return None def elements_position(self, selector): """Get the position of elements whose match selector @param selector: @return: position of QPoint """ attr, pattern, val = self.parser_selector(selector, attr='identifier') strip = lambda v: v.strip() if pattern: val = locals()[pattern](val) def identifier(query): return id(query) or name(query) def name(query): return css("*[name='%s']" % query) def id(query): return css('#' + query) def link(query): return xpath("//a[text()='%s']" % query.replace("\'", "\\'")) def css(query): result = [] for ele in self.main_frame.findAllElements(query): if not ele.isNull(): result.append(ele.geometry().center()) return result def xpath(query): positions = self.evaluate(""" function GetAbsoluteLocationEx(element) { if ( arguments.length != 1 || element == null ) { return null; } var elmt = element; var offsetTop = elmt.offsetTop; var offsetLeft = elmt.offsetLeft; var offsetWidth = elmt.offsetWidth; var offsetHeight = elmt.offsetHeight; while( elmt = elmt.offsetParent ) { // add this judge if ( elmt.style.position == 'absolute' || elmt.style.position == 'relative' || ( elmt.style.overflow != 'visible' && elmt.style.overflow != '' ) ) { break; } offsetTop += elmt.offsetTop; offsetLeft += elmt.offsetLeft; } return { absoluteTop: offsetTop, absoluteLeft: offsetLeft, offsetWidth: offsetWidth, offsetHeight: offsetHeight }; } result=[]; for (var r = document.evaluate('%s', document, null, 5, null), n; n = r.iterateNext();) { pos=GetAbsoluteLocationEx(n) result.push([pos.absoluteLeft+pos.offsetWidth/2.0,pos.absoluteTop+pos.offsetHeight/2.0]); } result """ % query.replace("\'", "\\'")) return [QPoint(*tuple(x)) for x in positions] return locals()[attr](val) def _move_page_center_to(self, qpoint): size = self.page.viewportSize() self.main_frame.setScrollPosition(qpoint - QPoint(size.width(), size.height()) / 2) def reload(self): """Reload page. @return: """ self.trigger_action('Reload', expect_loading=True) def back(self): self.trigger_action('Back') def forward(self): self.trigger_action('Forward') @can_load_page def trigger_action(self, action): """Trigger QWebPage::WebAction @param action: """ self.page.triggerAction(getattr(QWebPage, action)) def parser_selector(self, selector, attr=None, pattern=None, val=None): index = selector.find('=') if index <= 0: val = selector else: attr = selector[:index] value_ = selector[index + 1:] index = value_.find(':') if index > 0: pattern = value_[:index] val = value_[index + 1:] return attr, pattern, val @can_load_page @have_a_break def click(self, selector): qpoint = self.first_element_position(selector) if qpoint: return self.qpoint_to_tuple(self._click_position(qpoint)) @can_load_page @have_a_break def test(self): return self.qpoint_to_tuple(QPoint(1, 2)) @can_load_page def _click_position(self, qpoint): self._move_page_center_to(qpoint) self.webview.repaint() pos = qpoint - self.main_frame.scrollPosition() self._move_to_position(pos) QTest.mouseClick(self.webview, Qt.LeftButton, pos=pos) gevent.sleep(1) return pos def qpoint_to_tuple(self, qpoint): return qpoint.x(), qpoint.y() @have_a_break def move_to(self, selector): qpoint = self.first_element_position(selector) if qpoint: self._move_to_position(qpoint) return qpoint_to_tuple(qpoint) def move_at(self, x, y): return self._move_to_position(QPoint(x, y)) def _move_to_position(self, qpoint): QTest.mouseMove(self.webview, pos=qpoint) return qpoint @can_load_page @have_a_break def click_at(self, x, y): return self._click_position(QPoint(x, y)) @have_a_break def key_clicks(self, selector, text): if selector: self.click(selector) QTest.keyClicks(self.webview, text, delay=50) @have_a_break def type(self, selector, text): position = self.click(selector) ele = self._hit_element_from(QPoint(*position)) ele.setFocus() ele.evaluateJavaScript( """ core.events.setValue(this, '%s') """ % (text.replace("\n", "\\n").replace("\'", "\\'")) ) logger.debug('type %s %s' % (selector, text)) def _hit_element_from(self, position): return self.main_frame.hitTestContent(position).element() def first_element(self, selector): position = self.first_element_position(selector) if position: return self.main_frame.hitTestContent(position).element(), position def wait_forever(self): self.wait_for(lambda: False, time_for_stop=-1) @have_a_break def check(self, selector, checked=True): ele, position = self.first_element(selector) if ele and ele.tagName() == 'INPUT': if ele.attribute('type') in ['checkbox', 'radio']: ele_checked = ele.attribute('checked') == 'checked' or False if ele_checked != checked: self._click_position(position) else: raise ValueError("%s is not a checkbox or radio" % selector) @have_a_break def select(self, selector, value): def _select(query, select_by, select): select.evaluateJavaScript(""" triggerEvent(this, 'focus', false); var changed = false; var optionToSelect = '%s'; for (var i = 0; i < this.options.length; i++) { var option = this.options[i]; if (option.selected && option.%s != optionToSelect) { option.selected = false; changed = true; } else if (!option.selected && option.%s == optionToSelect) { option.selected = true; changed = true; } } if (changed) { triggerEvent(this, 'change', true); } """ % ( query.replace("\'", "\\'"), select_by, select_by)) def _add_selection(query, select_by, select, selected): select.evaluateJavaScript(""" triggerEvent(this, 'focus', false); var optionToSelect = '%s'; for (var i = 0; i < this.options.length; i++) { var option = this.options[i]; if (option.%s == optionToSelect) { option.selected = %s; triggerEvent(this, 'change', true); } } """ % ( query.replace("\'", "\\'"), select_by, selected and 'true' or 'false')) ele, position = self.first_element(selector) if ele and ele.tagName() == 'SELECT': ele.setFocus() if ele.attribute('multiple') == 'multiple': assert isinstance(value, list) for value_, selected in value: attr, pattern, val = self.parser_selector(value_, attr='text') _add_selection(val, attr, ele, selected) else: attr, pattern, val = self.parser_selector(value, attr='text') _select(val, attr, ele) def choose_file(self, selector, file): self._upload_file = file self.click(selector) self._upload_file = None def capture(self, selector=None): """Capture the images of selector. @param selector: Css selector. @return: Images """ elements = self.main_frame.documentElement().findAll(selector) imgs = [] for element in elements: geo = element.geometry() img = QImage(geo.width(), geo.height(), QImage.Format_ARGB32) painter = QPainter(img) element.render(painter) painter.end() imgs.append(img) return imgs def capture_to(self, path, selector=None): """Capture the images of selector to files. @param path: File path with index suffix. @param selector: Css selector. @return: The paths of saving. """ _, ext = os.path.splitext(path) ext = ext[1:] imgs = self.capture(selector) result = [] for index, img in enumerate(imgs): filepath = '%s.%s' % (path, index) if img.save(filepath, ext.upper()): result.append(filepath) return result def capture_to_buf(self, selector=None): """capture the images of selector to StringIO @param selector: Css selector. @return: The StringIO list. """ images = self.capture(selector) result = [] for image in images: ba = QByteArray() buf = QBuffer(ba) buf.open(QIODevice.ReadWrite) image.save(buf, 'jpg') stream = StringIO(str(buf.buffer())) result.append(stream) return result @can_load_page def evaluate(self, script): """Evaluates script in page frame. @param script: The script to evaluate. """ result = self.main_frame.evaluateJavaScript("%s" % script) # if isinstance(result,QString): # result=unicode(result) return result def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. @param path: The path of the file. @param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def __del__(self): """Depend on the CG of Python. """ self.exit() def delete_cookies(self): """Deletes all cookies.""" self.cookie_jar.setAllCookies([]) def exists(self, selector): """Checks if element exists for given selector. @param string: The element selector. """ return not self.main_frame.findFirstElement(selector).isNull() #TODO: Still not work. # def remove_css(self): # """Remore the css,speed up page loading. # # @return: # """ # # return self.evaluate("""var targetelement="link";//determine element type to create nodelist from # var targetattr="href"//determine corresponding attribute to test for # var allsuspects=document.getElementsByTagName(targetelement) # for (var i=allsuspects.length; i>=0; i--){ //search backwards within nodelist for matching elements to remove # if (allsuspects[i] && allsuspects[i].getAttribute(targetattr)!=null ) # allsuspects[i].parentNode.removeChild(allsuspects[i]); //remove element by calling parentNode.removeChild() # } # """) def filter_resources(self, pattern): """Filter resources with pattern. @param pattern: Match pattern. @param resources: @return: @raise: """ if isinstance(pattern, str): is_match = lambda x: pattern == x elif isinstance(pattern, _pattern_type): is_match = lambda x: pattern.match(x) elif hasattr(pattern, '__call__'): is_match = pattern else: raise TypeError('pattern must be one of str,re.compile,callable') return filter(lambda x: is_match(x.request_url), self.http_resources)[:] def save(self, path): """Save current page content to the path. @param path: The path to save. """ f = open(path, 'w') f.write(self.content().encode('utf-8')) f.close() def global_exists(self, global_name): """Checks if javascript global exists. @param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name) def load_cookies( self, cookie_storage, keep_old=False ): """load from cookielib's CookieJar or Set-Cookie3 format text file. @param cookie_storage: file location string on disk or CookieJar instance. @param keep_old: Don't reset, keep cookies not overridden. """ def toQtCookieJar( PyCookieJar, QtCookieJar ): allCookies = QtCookieJar.cookies if keep_old else [] for pc in PyCookieJar: qc = toQtCookie(pc) allCookies.append(qc) QtCookieJar.setAllCookies(allCookies) def toQtCookie(PyCookie): qc = QNetworkCookie(PyCookie.name, PyCookie.value) qc.setSecure(PyCookie.secure) if PyCookie.path_specified: qc.setPath(PyCookie.path) if PyCookie.domain != "": qc.setDomain(PyCookie.domain) if PyCookie.expires != 0: t = QDateTime() t.setTime_t(PyCookie.expires) qc.setExpirationDate(t) # not yet handled(maybe less useful): # py cookie.rest / QNetworkCookie.setHttpOnly() return qc if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) cj.load() toQtCookieJar(cj, self.cookie_jar) elif cookie_storage.__class__.__name__.endswith('CookieJar'): toQtCookieJar(cookie_storage, self.cookie_jar) else: raise ValueError('unsupported cookie_storage type.') def save_cookies(self, cookie_storage): """Save to cookielib's CookieJar or Set-Cookie3 format text file. @param cookie_storage: file location string or CookieJar instance. """ def toPyCookieJar(QtCookieJar, PyCookieJar): for c in QtCookieJar.allCookies(): PyCookieJar.set_cookie(toPyCookie(c)) def toPyCookie(QtCookie): port = None port_specified = False secure = QtCookie.isSecure() name = str(QtCookie.name()) value = str(QtCookie.value()) v = str(QtCookie.path()) path_specified = bool(v != "") path = v if path_specified else None v = str(QtCookie.domain()) domain_specified = bool(v != "") domain = v domain_initial_dot = v.startswith('.') if domain_specified else None v = int(QtCookie.expirationDate().toTime_t()) # Long type boundary on 32bit platfroms; avoid ValueError expires = 2147483647 if v > 2147483647 else v rest = {} discard = False return Cookie(0, name, value, port, port_specified, domain , domain_specified, domain_initial_dot, path, path_specified , secure, expires, discard, None, None, rest) if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) toPyCookieJar(self.cookie_jar, cj) cj.save() elif cookie_storage.__class__.__name__.endswith('CookieJar'): toPyCookieJar(self.cookie_jar, cookie_storage) else: raise ValueError('unsupported cookie_storage type.') def wait_for_confirm(self, confirm=True, callback=None): """Statement that tells GRobot how to deal with javascript confirm(). @param confirm: A bollean that confirm. @param callable: A callable that returns a boolean for confirmation. """ self._robot._confirm_expected = (confirm, callback) self._robot.wait_for(lambda: self._robot._confirm_expected is None) return self.popup_messages def wait_for_text(self, text, time_for_stop=None): """Waits until given text appear on main frame. @param text: The text to wait for. @return: """ logger.debug("Wait for text %s" % text) self.wait_for(lambda: text in self.content(), "Can\'t find '%s' in current frame" % text, time_for_stop=time_for_stop) return self.wait_for_page_loaded() def wait_for_xpath(self, expression, time_for_stop=None): self.wait_for(lambda: XPath(self.content()).execute(expression), "Can't find xpath=%s in current frame" % expression, time_for_stop=time_for_stop) return self.wait_for_page_loaded() def wait_for_selector(self, selector): """Waits until selector match an element on the frame. @param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) def wait_for_page_loaded(self, time_for_stop=None): """Waits until page is loaded, assumed that a page as been requested. """ return self.wait_for(lambda: self._loaded, 'Unable to load requested page', time_for_stop=time_for_stop) def wait_for(self, condition, timeout_message='', time_for_stop=None): """Waits until condition is True. @param condition: A callable that returns the condition. @param timeout_message: The exception message on timeout.-1 means never timeout. """ if self._loaded: time_for_stop = time_for_stop or self.operate_timeout else: time_for_stop = time_for_stop or self.loading_timeout started_at = time.time() while not condition(): if time_for_stop != -1 and time.time() > (started_at + time_for_stop): if self._loaded: raise OperateTimeout(timeout_message) else: # raise LoadingTimeout, timeout_message self.trigger_action('Stop') #QWebPage::Stop self._loaded = True logger.warning("Page loading timeout.Force to stop the page") break gevent.sleep(2) def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: self._alert is not None, 'User has not been alerted.') msg, self._alert = self._alert, None return msg def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources, self.http_resources = self.http_resources[:], [] return last_resources def _page_loaded(self, success): if self.develop and self.display: if self.inspector is None: self.inspector = QWebInspector() self.inspector.setPage(self.page) self.inspector.show() scripts = [ 'atoms.js', 'htmlutils.js', ] if self.jquery_namespace: scripts.append('jquery-1.9.1.min.js', ) for script in scripts: self.evaluate_js_file(os.path.dirname(__file__) + '/javascripts/' + script) if self.jquery_namespace: self.evaluate("%s=jQuery.noConflict();" % self.jquery_namespace) self._loaded = True # self.cache.clear() logger.debug("Page load finished") def _page_load_started(self): logger.debug("Start load page") self._loaded = False def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. @param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache, reply.readAll())) def _link_clicked(self, href): """Contorl the page link clicked event,forbid open new window. @param href: The href attribute of a tag. """ self.main_frame.load(href) def _request_ended(self, reply): """Adds an HttpResource object to http_resources. @param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache)) def _authenticate(self, mix, authenticator): """Called back on basic / proxy http auth. @param mix: The QNetworkReply or QNetworkProxy object. @param authenticator: The QAuthenticator object. """ if self._auth_attempt == 0: username, password = self._auth authenticator.setUser(username) authenticator.setPassword(password) self._auth_attempt += 1 def _on_manager_ssl_errors(self, reply, errors): """Ingore all the ssl error @param reply: @param errors: """ url = str(reply.url().toString()) if self.ignore_ssl_errors: reply.ignoreSslErrors() else: logger.warning('SSL certificate error: %s' % url) def exit(self): """Destroy the Qt main event loop. """ GRobot.exit_lock.acquire() if not self._deleted: if self.inspector: self.inspector.close() sip.delete(self.inspector) if self.display: self.webview.close() sip.delete(self.webview) if self.page and not sip.isdeleted(self.page): sip.delete(self.page) GRobot._liveRobot -= 1 if GRobot._liveRobot == 0 and GRobot._loop is not None: GRobot._kill_loop=gevent.spawn_later(20,self.kill_loop) self._deleted = True GRobot.exit_lock.release() def kill_loop(self): GRobot._loop.stop() GRobot._loop = None GRobot._app = None if hasattr(self, 'xvfb'): GRobot.xvfb.terminate()
class Kit(object): _app = None def __init__(self, gui=False): if not Kit._app: Kit._app = QApplication([]) manager = KitNetworkAccessManager() manager.finished.connect(self.network_reply_handler) self.cookie_jar = QNetworkCookieJar() manager.setCookieJar(self.cookie_jar) self.page = KitPage() self.page.setNetworkAccessManager(manager) self.view = KitWebView() self.view.setPage(self.page) self.view.setApplication(Kit._app) if gui: self.view.show() def get_cookies(self): cookies = {} for cookie in self.cookie_jar.allCookies(): cookies[cookie.name().data()] = cookie.value().data() return cookies def request(self, url, user_agent='Mozilla', cookies={}, timeout=15, method='get', data=None, headers={}): url_info = urlsplit(url) self.resource_list = [] loop = QEventLoop() self.view.loadFinished.connect(loop.quit) # Timeout timer = QTimer() timer.setSingleShot(True) timer.timeout.connect(loop.quit) timer.start(timeout * 1000) # User-Agent self.page.user_agent = user_agent # Cookies cookie_obj_list = [] for name, value in cookies.items(): domain = ('.' + url_info.netloc).split(':')[0] #print 'CREATE COOKIE %s=%s' % (name, value) #print 'DOMAIN = %s' % domain cookie_obj = QNetworkCookie(name, value) cookie_obj.setDomain(domain) cookie_obj_list.append(cookie_obj) self.cookie_jar.setAllCookies(cookie_obj_list) # Method method_obj = getattr(QNetworkAccessManager, '%sOperation' % method.capitalize()) # Ensure that Content-Type is correct if method is post if method == 'post': headers['Content-Type'] = 'application/x-www-form-urlencoded' # Post data if data is None: data = QByteArray() # Request object request_obj = QNetworkRequest(QUrl(url)) # Headers for name, value in headers.items(): request_obj.setRawHeader(name, value) # Make a request self.view.load(request_obj, method_obj, data) loop.exec_() if timer.isActive(): request_resource = None url = str(self.page.mainFrame().url().toString()).rstrip('/') for res in self.resource_list: if url == res.url or url == res.url.rstrip('/'): request_resource = res break if request_resource: return self.build_response(request_resource) else: raise KitError('Request was successfull but it is not possible '\ 'to associate the request to one of received responses') else: raise KitError('Timeout while loading %s' % url) def build_response(self, resource): response = Response() response.head = '' response.code = resource.status_code runtime_body = self.page.mainFrame().toHtml() body = resource.reply.data url = resource.reply.url().toString() headers = resource.headers cookies = self.get_cookies() # py3 hack if PY3K: if isinstance(body, QByteArray): body = body.data() headers = decode_dict(headers) cookies = decode_dict(cookies) else: runtime_body = unicode(runtime_body) body = str(body) url = str(url) response.runtime_body = runtime_body.encode('utf-8') response.body = body response.url = url response.parse(charset='utf-8') response.headers = headers response.cookies = cookies return response def __del__(self): self.view.setPage(None) def network_reply_handler(self, reply): status_code = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute) if status_code: if not isinstance(status_code, int): status_code = status_code.toInt()[0] logger.debug('Resource loaded: %s [%d]' % (reply.url().toString(), status_code)) self.resource_list.append(Resource(reply))
class Ghost(object): """Ghost manages a QWebPage. :param user_agent: The default User-Agent header. :param wait_timeout: Maximum step duration in second. :param wait_callback: An optional callable that is periodically executed until Ghost stops waiting. :param log_level: The optional logging level. :param display: A boolean that tells ghost to displays UI. :param viewport_size: A tupple that sets initial viewport size. """ _alert = None _confirm_expected = None _prompt_expected = None _upload_file = None _app = None def __init__(self, user_agent=default_user_agent, wait_timeout=8, wait_callback=None, log_level=logging.WARNING, display=False, viewport_size=(800, 600), cache_dir='/tmp/ghost.py'): self.http_resources = [] self.user_agent = user_agent self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.loaded = True if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\ and not hasattr(Ghost, 'xvfb'): try: os.environ['DISPLAY'] = ':99' Ghost.xvfb = subprocess.Popen(['Xvfb', ':99']) except OSError: raise Exception('Xvfb is required to a ghost run oustside ' +\ 'an X instance') self.display = display if not Ghost._app: Ghost._app = QApplication.instance() or QApplication(['ghost']) self.page = GhostWebPage(Ghost._app) QtWebKit.QWebSettings.setMaximumPagesInCache(0) QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0) self.page.setForwardUnsupportedContent(True) self.set_viewport_size(*viewport_size) # Page signals self.page.loadFinished.connect(self._page_loaded) self.page.loadStarted.connect(self._page_load_started) self.page.unsupportedContent.connect(self._unsupported_content) self.manager = self.page.networkAccessManager() self.manager.finished.connect(self._request_ended) # Cache self.cache = QNetworkDiskCache() self.cache.setCacheDirectory(cache_dir) self.manager.setCache(self.cache) # Cookie jar self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) # User Agent self.page.setUserAgent(self.user_agent) self.page.networkAccessManager().authenticationRequired\ .connect(self._authenticate) self.page.networkAccessManager().proxyAuthenticationRequired\ .connect(self._authenticate) self.main_frame = self.page.mainFrame() logger.setLevel(log_level) if self.display: self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() def __del__(self): self.exit() def capture(self, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Returns snapshot as QImage. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ if region is None and selector is not None: region = self.region_for_selector(selector) if region: x1, y1, x2, y2 = region w, h = (x2 - x1), (y2 - y1) image = QImage(QSize(x2, y2), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() image = image.copy(x1, y1, w, h) else: image = QImage(self.page.viewportSize(), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() return image def capture_to(self, path, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Saves snapshot as image. :param path: The destination path. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ self.capture(region=region, format=format, selector=selector).save(path) @client_utils_required @can_load_page def click(self, selector): """Click the targeted element. :param selector: A CSS3 selector to targeted element. """ if not self.exists(selector): raise Exception("Can't find element to click") return self.evaluate('GhostUtils.click("%s");' % selector) class confirm: """Statement that tells Ghost how to deal with javascript confirm(). :param confirm: A bollean that confirm. :param callable: A callable that returns a boolean for confirmation. """ def __init__(self, confirm=True, callback=None): self.confirm = confirm self.callback = callback def __enter__(self): Ghost._confirm_expected = (self.confirm, self.callback) def __exit__(self, type, value, traceback): Ghost._confirm_expected = None @property def content(self): """Returns current frame HTML as a string.""" return unicode(self.main_frame.toHtml()) @property def cookies(self): """Returns all cookies.""" return self.cookie_jar.allCookies() def delete_cookies(self): """Deletes all cookies.""" self.cookie_jar.setAllCookies([]) @can_load_page def evaluate(self, script): """Evaluates script in page frame. :param script: The script to evaluate. """ return (self.main_frame.evaluateJavaScript("%s" % script), self._release_last_resources()) def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. :param path: The path of the file. :param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def exists(self, selector): """Checks if element exists for given selector. :param string: The element selector. """ return not self.main_frame.findFirstElement(selector).isNull() def exit(self): """Exits application and relateds.""" if self.display: self.webview.close() Ghost._app.quit() del self.manager del self.page del self.main_frame if hasattr(self, 'xvfb'): self.xvfb.terminate() @can_load_page def fill(self, selector, values): """Fills a form with provided values. :param selector: A CSS selector to the target form to fill. :param values: A dict containing the values. """ if not self.exists(selector): raise Exception("Can't find form") resources = [] for field in values: r, res = self.set_field_value("%s [name=%s]" % (selector, field), values[field]) resources.extend(res) return True, resources @client_utils_required @can_load_page def fire_on(self, selector, method): """Call method on element matching given selector. :param selector: A CSS selector to the target element. :param method: The name of the method to fire. :param expect_loading: Specifies if a page loading is expected. """ return self.evaluate('GhostUtils.fireOn("%s", "%s");' % (selector, method)) def global_exists(self, global_name): """Checks if javascript global exists. :param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name)[0] def hide(self): """Close the webview.""" try: self.webview.close() except: raise Exception("no webview to close") def open(self, address, method='get', headers={}, auth=None): """Opens a web page. :param address: The resource URL. :param method: The Http method. :param headers: An optional dict of extra request hearders. :param auth: An optional tupple of HTTP auth (username, password). :return: Page resource, All loaded resources. """ body = QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Exception("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl(0) for header in headers: request.setRawHeader(header, headers[header]) self._auth = auth self._auth_attempt = 0 # Avoids reccursion self.main_frame.load(request, method, body) self.loaded = False return self.wait_for_page_loaded() class prompt: """Statement that tells Ghost how to deal with javascript prompt(). :param value: A string value to fill in prompt. :param callback: A callable that returns the value to fill in. """ def __init__(self, value='', callback=None): self.value = value self.callback = callback def __enter__(self): Ghost._prompt_expected = (self.value, self.callback) def __exit__(self, type, value, traceback): Ghost._prompt_expected = None @client_utils_required def region_for_selector(self, selector): """Returns frame region for given selector as tupple. :param selector: The targeted element. """ geo = self.main_frame.findFirstElement(selector).geometry() try: region = (geo.left(), geo.top(), geo.right(), geo.bottom()) except: raise Exception("can't get region for selector '%s'" % selector) return region @can_load_page @client_utils_required def set_field_value(self, selector, value, blur=True): """Sets the value of the field matched by given selector. :param selector: A CSS selector that target the field. :param value: The value to fill in. :param blur: An optional boolean that force blur when filled in. """ def _set_text_value(selector, value): return self.evaluate('document.querySelector("%s").value=%s;' % (selector, json.dumps(value))) res, resources = None, [] element = self.main_frame.findFirstElement(selector) if element.isNull(): raise Exception('can\'t find element for %s"' % selector) self.fire_on(selector, 'focus') if element.tagName() in ["TEXTAREA", "SELECT"]: res, resources = _set_text_value(selector, value) elif element.tagName() == "INPUT": if element.attribute('type') in [ "color", "date", "datetime", "datetime-local", "email", "hidden", "month", "number", "password", "range", "search", "tel", "text", "time", "url", "week" ]: res, resources = _set_text_value(selector, value) elif element.attribute('type') == "checkbox": res, resources = self.evaluate( 'GhostUtils.setCheckboxValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "radio": res, resources = self.evaluate( 'GhostUtils.setRadioValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "file": Ghost._upload_file = value res, resources = self.click(selector) Ghost._upload_file = None else: raise Exception('unsuported field tag') if blur: self.fire_on(selector, 'blur') return res, resources def set_viewport_size(self, width, height): """Sets the page viewport size. :param width: An integer that sets width pixel count. :param height: An integer that sets height pixel count. """ self.page.setViewportSize(QSize(width, height)) def show(self): """Show current page inside a QWebView. """ self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() def wait_for(self, condition, timeout_message): """Waits until condition is True. :param condition: A callable that returns the condition. :param timeout_message: The exception message on timeout. """ started_at = time.time() while not condition(): if time.time() > (started_at + self.wait_timeout): raise Exception(timeout_message) time.sleep(0.01) Ghost._app.processEvents() if self.wait_callback is not None: self.wait_callback() def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: Ghost._alert is not None, 'User has not been alerted.') msg = Ghost._alert Ghost._alert = None return msg, self._release_last_resources() def wait_for_page_loaded(self): """Waits until page is loaded, assumed that a page as been requested. """ self.wait_for(lambda: self.loaded, 'Unable to load requested page') resources = self._release_last_resources() page = None url = self.main_frame.url().toString() for resource in resources: if url == resource.url: page = resource return page, resources def wait_for_selector(self, selector): """Waits until selector match an element on the frame. :param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) return True, self._release_last_resources() def wait_for_text(self, text): """Waits until given text appear on main frame. :param text: The text to wait for. """ self.wait_for(lambda: text in self.content, 'Can\'t find "%s" in current frame' % text) return True, self._release_last_resources() def _authenticate(self, mix, authenticator): """Called back on basic / proxy http auth. :param mix: The QNetworkReply or QNetworkProxy object. :param authenticator: The QAuthenticator object. """ if self._auth_attempt == 0: username, password = self._auth authenticator.setUser(username) authenticator.setPassword(password) self._auth_attempt += 1 def _page_loaded(self): """Called back when page is loaded. """ self.loaded = True self.cache.clear() def _page_load_started(self): """Called back when page load started. """ self.loaded = False def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources = self.http_resources self.http_resources = [] return last_resources def _request_ended(self, reply): """Adds an HttpResource object to http_resources. :param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append(HttpResource(reply, self.cache)) def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. :param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append( HttpResource(reply, self.cache, reply.readAll()))
class Ghost(object): """Ghost manages a QWebPage. :param user_agent: The default User-Agent header. :param wait_timeout: Maximum step duration in second. :param wait_callback: An optional callable that is periodically executed until Ghost stops waiting. :param log_level: The optional logging level. :param display: A boolean that tells ghost to displays UI. :param viewport_size: A tuple that sets initial viewport size. :param ignore_ssl_errors: A boolean that forces ignore ssl errors. :param cache_dir: A directory path where to store cache datas. :param plugins_enabled: Enable plugins (like Flash). :param java_enabled: Enable Java JRE. :param plugin_path: Array with paths to plugin directories (default ['/usr/lib/mozilla/plugins']) :param download_images: Indicate if the browser should download images """ _alert = None _confirm_expected = None _prompt_expected = None _upload_file = None _app = None def __init__(self, user_agent=default_user_agent, wait_timeout=8, wait_callback=None, log_level=logging.WARNING, display=False, viewport_size=(800, 600), ignore_ssl_errors=True, cache_dir=os.path.join(tempfile.gettempdir(), "ghost.py"), plugins_enabled=False, java_enabled=False, plugin_path=[ '/usr/lib/mozilla/plugins', ], download_images=True, qt_debug=False): self.http_resources = [] self.user_agent = user_agent self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.ignore_ssl_errors = ignore_ssl_errors self.loaded = True if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\ and not hasattr(Ghost, 'xvfb'): try: os.environ['DISPLAY'] = ':99' Ghost.xvfb = subprocess.Popen(['Xvfb', ':99']) except OSError: raise Error('Xvfb is required to a ghost run outside ' + 'an X instance') self.display = display if not Ghost._app: Ghost._app = QApplication.instance() or QApplication(['ghost']) qInstallMsgHandler(QTMessageProxy(qt_debug)) if plugin_path: for p in plugin_path: Ghost._app.addLibraryPath(p) self.popup_messages = [] self.page = GhostWebPage(Ghost._app, self) QtWebKit.QWebSettings.setMaximumPagesInCache(0) QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0) QtWebKit.QWebSettings.globalSettings().setAttribute( QtWebKit.QWebSettings.LocalStorageEnabled, True) self.page.setForwardUnsupportedContent(True) self.page.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.page.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.page.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.page.loadFinished.connect(self._page_loaded) self.page.loadStarted.connect(self._page_load_started) self.page.unsupportedContent.connect(self._unsupported_content) self.manager = self.page.networkAccessManager() self.manager.finished.connect(self._request_ended) self.manager.sslErrors.connect(self._on_manager_ssl_errors) # Cache self.cache = QNetworkDiskCache() self.cache.setCacheDirectory(cache_dir) self.manager.setCache(self.cache) # Cookie jar self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) # User Agent self.page.setUserAgent(self.user_agent) self.page.networkAccessManager().authenticationRequired\ .connect(self._authenticate) self.page.networkAccessManager().proxyAuthenticationRequired\ .connect(self._authenticate) self.main_frame = self.page.mainFrame() logger.setLevel(log_level) if self.display: class MyQWebView(QtWebKit.QWebView): def sizeHint(self): return QSize(*viewport_size) self.webview = MyQWebView() if plugins_enabled: self.webview.settings().setAttribute( QtWebKit.QWebSettings.PluginsEnabled, True) if java_enabled: self.webview.settings().setAttribute( QtWebKit.QWebSettings.JavaEnabled, True) self.webview.setPage(self.page) self.webview.show() else: self.webview = None def __del__(self): self.exit() def capture(self, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Returns snapshot as QImage. :param region: An optional tuple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ if region is None and selector is not None: region = self.region_for_selector(selector) if region: x1, y1, x2, y2 = region w, h = (x2 - x1), (y2 - y1) image = QImage(QSize(x2, y2), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() image = image.copy(x1, y1, w, h) else: self.main_frame.setScrollBarPolicy(QtCore.Qt.Vertical, QtCore.Qt.ScrollBarAlwaysOff) self.main_frame.setScrollBarPolicy(QtCore.Qt.Horizontal, QtCore.Qt.ScrollBarAlwaysOff) self.page.setViewportSize(self.main_frame.contentsSize()) image = QImage(self.page.viewportSize(), format) painter = QPainter(image) self.main_frame.render(painter) painter.end() return image def capture_to(self, path, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Saves snapshot as image. :param path: The destination path. :param region: An optional tuple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ self.capture(region=region, format=format, selector=selector).save(path) def print_to_pdf( self, path, paper_size=(8.5, 11.0), paper_margins=(0, 0, 0, 0), paper_units=QPrinter.Inch, zoom_factor=1.0, ): """Saves page as a pdf file. See qt4 QPrinter documentation for more detailed explanations of options. :param path: The destination path. :param paper_size: A 2-tuple indicating size of page to print to. :param paper_margins: A 4-tuple indicating size of each margin. :param paper_units: Units for pager_size, pager_margins. :param zoom_factor: Scale the output content. """ assert len(paper_size) == 2 assert len(paper_margins) == 4 printer = QPrinter(mode=QPrinter.ScreenResolution) printer.setOutputFormat(QPrinter.PdfFormat) printer.setPaperSize(QtCore.QSizeF(*paper_size), paper_units) printer.setPageMargins(*(paper_margins + (paper_units, ))) printer.setFullPage(True) printer.setOutputFileName(path) if self.webview is None: self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.setZoomFactor(zoom_factor) self.webview.print_(printer) @can_load_page def click(self, selector): """Click the targeted element. :param selector: A CSS3 selector to targeted element. """ if not self.exists(selector): raise Error("Can't find element to click") return self.evaluate(""" var element = document.querySelector("%s"); var evt = document.createEvent("MouseEvents"); evt.initMouseEvent("click", true, true, window, 1, 1, 1, 1, 1, false, false, false, false, 0, element); element.dispatchEvent(evt) """ % selector) class confirm: """Statement that tells Ghost how to deal with javascript confirm(). :param confirm: A boolean to set confirmation. :param callable: A callable that returns a boolean for confirmation. """ def __init__(self, confirm=True, callback=None): self.confirm = confirm self.callback = callback def __enter__(self): Ghost._confirm_expected = (self.confirm, self.callback) def __exit__(self, type, value, traceback): Ghost._confirm_expected = None @property def content(self, to_unicode=True): """Returns current frame HTML as a string. :param to_unicode: Whether to convert html to unicode or not """ if to_unicode: return unicode(self.main_frame.toHtml()) else: return self.main_frame.toHtml() @property def cookies(self): """Returns all cookies.""" return self.cookie_jar.allCookies() def delete_cookies(self): """Deletes all cookies.""" self.cookie_jar.setAllCookies([]) def clear_alert_message(self): """Clears the alert message""" self._alert = None @can_load_page def evaluate(self, script): """Evaluates script in page frame. :param script: The script to evaluate. """ return (self.main_frame.evaluateJavaScript("%s" % script), self._release_last_resources()) def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. :param path: The path of the file. :param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def exists(self, selector): """Checks if element exists for given selector. :param string: The element selector. """ return not self.main_frame.findFirstElement(selector).isNull() def exit(self): """Exits application and related.""" if self.display: self.webview.close() Ghost._app.quit() del self.manager del self.page del self.main_frame if hasattr(self, 'xvfb'): self.xvfb.terminate() @can_load_page def fill(self, selector, values): """Fills a form with provided values. :param selector: A CSS selector to the target form to fill. :param values: A dict containing the values. """ if not self.exists(selector): raise Error("Can't find form") resources = [] for field in values: r, res = self.set_field_value("%s [name=%s]" % (selector, field), values[field]) resources.extend(res) return True, resources @can_load_page def fire_on(self, selector, method): """Call method on element matching given selector. :param selector: A CSS selector to the target element. :param method: The name of the method to fire. :param expect_loading: Specifies if a page loading is expected. """ return self.evaluate('document.querySelector("%s").%s();' % \ (selector, method)) def global_exists(self, global_name): """Checks if javascript global exists. :param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name)[0] def hide(self): """Close the webview.""" try: self.webview.close() except: raise Error("no webview to close") def load_cookies(self, cookie_storage, keep_old=False): """load from cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string on disk or CookieJar instance. :param keep_old: Don't reset, keep cookies not overridden. """ def toQtCookieJar(PyCookieJar, QtCookieJar): allCookies = QtCookieJar.cookies if keep_old else [] for pc in PyCookieJar: qc = toQtCookie(pc) allCookies.append(qc) QtCookieJar.setAllCookies(allCookies) def toQtCookie(PyCookie): qc = QNetworkCookie(PyCookie.name, PyCookie.value) qc.setSecure(PyCookie.secure) if PyCookie.path_specified: qc.setPath(PyCookie.path) if PyCookie.domain != "": qc.setDomain(PyCookie.domain) if PyCookie.expires != 0: t = QDateTime() t.setTime_t(PyCookie.expires) qc.setExpirationDate(t) # not yet handled(maybe less useful): # py cookie.rest / QNetworkCookie.setHttpOnly() return qc if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) cj.load() toQtCookieJar(cj, self.cookie_jar) elif cookie_storage.__class__.__name__.endswith('CookieJar'): toQtCookieJar(cookie_storage, self.cookie_jar) else: raise ValueError, 'unsupported cookie_storage type.' def open(self, address, method='get', headers={}, auth=None, body=None, default_popup_response=None): """Opens a web page. :param address: The resource URL. :param method: The Http method. :param headers: An optional dict of extra request hearders. :param auth: An optional tuple of HTTP auth (username, password). :param body: An optional string containing a payload. :param default_popup_response: the default response for any confirm/ alert/prompt popup from the Javascript (replaces the need for the with blocks) :return: Page resource, All loaded resources. """ body = body or QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Error("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl(0) for header in headers: request.setRawHeader(header, headers[header]) self._auth = auth self._auth_attempt = 0 # Avoids reccursion self.main_frame.load(request, method, body) self.loaded = False if default_popup_response is not None: Ghost._prompt_expected = (default_popup_response, None) Ghost._confirm_expected = (default_popup_response, None) return self.wait_for_page_loaded() class prompt: """Statement that tells Ghost how to deal with javascript prompt(). :param value: A string value to fill in prompt. :param callback: A callable that returns the value to fill in. """ def __init__(self, value='', callback=None): self.value = value self.callback = callback def __enter__(self): Ghost._prompt_expected = (self.value, self.callback) def __exit__(self, type, value, traceback): Ghost._prompt_expected = None def region_for_selector(self, selector): """Returns frame region for given selector as tuple. :param selector: The targeted element. """ geo = self.main_frame.findFirstElement(selector).geometry() try: region = (geo.left(), geo.top(), geo.right(), geo.bottom()) except: raise Error("can't get region for selector '%s'" % selector) return region def save_cookies(self, cookie_storage): """Save to cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string or CookieJar instance. """ def toPyCookieJar(QtCookieJar, PyCookieJar): for c in QtCookieJar.allCookies(): PyCookieJar.set_cookie(toPyCookie(c)) def toPyCookie(QtCookie): port = None port_specified = False secure = QtCookie.isSecure() name = str(QtCookie.name()) value = str(QtCookie.value()) v = str(QtCookie.path()) path_specified = bool(v != "") path = v if path_specified else None v = str(QtCookie.domain()) domain_specified = bool(v != "") domain = v domain_initial_dot = v.startswith( '.') if domain_specified else None v = long(QtCookie.expirationDate().toTime_t()) # Long type boundary on 32bit platfroms; avoid ValueError expires = 2147483647 if v > 2147483647 else v rest = {} discard = False return Cookie(0, name, value, port, port_specified, domain, domain_specified, domain_initial_dot, path, path_specified, secure, expires, discard, None, None, rest) if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) toPyCookieJar(self.cookie_jar, cj) cj.save() elif cookie_storage.__class__.__name__.endswith('CookieJar'): toPyCookieJar(self.cookie_jar, cookie_storage) else: raise ValueError, 'unsupported cookie_storage type.' @can_load_page def set_field_value(self, selector, value, blur=True): """Sets the value of the field matched by given selector. :param selector: A CSS selector that target the field. :param value: The value to fill in. :param blur: An optional boolean that force blur when filled in. """ def _set_checkbox_value(el, value): el.setFocus() if value is True: el.setAttribute('checked', 'checked') else: el.removeAttribute('checked') def _set_checkboxes_value(els, value): for el in els: if el.attribute('value') == value: _set_checkbox_value(el, True) else: _set_checkbox_value(el, False) def _set_radio_value(els, value): for el in els: if el.attribute('value') == value: el.setFocus() el.setAttribute('checked', 'checked') def _set_text_value(el, value): el.setFocus() el.setAttribute('value', value) def _set_select_value(el, value): el.setFocus() self.evaluate( 'document.querySelector("%s").value = "%s";' % (selector.replace('"', '\"'), value.replace('"', '\"'))) def _set_textarea_value(el, value): el.setFocus() el.setPlainText(value) res, ressources = None, [] element = self.main_frame.findFirstElement(selector) if element.isNull(): raise Error('can\'t find element for %s"' % selector) if element.tagName() == "SELECT": _set_select_value(element, value) elif element.tagName() == "TEXTAREA": _set_textarea_value(element, value) elif element.tagName() == "INPUT": if element.attribute('type') in [ "color", "date", "datetime", "datetime-local", "email", "hidden", "month", "number", "password", "range", "search", "tel", "text", "time", "url", "week" ]: _set_text_value(element, value) elif element.attribute('type') == "checkbox": els = self.main_frame.findAllElements(selector) if els.count() > 1: _set_checkboxes_value(els, value) else: _set_checkbox_value(element, value) elif element.attribute('type') == "radio": _set_radio_value(self.main_frame.findAllElements(selector), value) elif element.attribute('type') == "file": Ghost._upload_file = value res, resources = self.click(selector) Ghost._upload_file = None else: raise Error('unsuported field tag') if blur: self.fire_on(selector, 'blur') return res, ressources def set_proxy(self, type, host='localhost', port=8888, user='', password=''): """Set up proxy for FURTHER connections. :param type: proxy type to use: \ none/default/socks5/https/http. :param host: proxy server ip or host name. :param port: proxy port. """ _types = { 'default': QNetworkProxy.DefaultProxy, 'none': QNetworkProxy.NoProxy, 'socks5': QNetworkProxy.Socks5Proxy, 'https': QNetworkProxy.HttpProxy, 'http': QNetworkProxy.HttpCachingProxy } if type is None: type = 'none' type = type.lower() if type in ['none', 'default']: self.manager.setProxy(QNetworkProxy(_types[type])) return elif type in _types: proxy = QNetworkProxy(_types[type], hostName=host, port=port, user=user, password=password) self.manager.setProxy(proxy) else: raise ValueError, 'Unsupported proxy type:' + type \ + '\nsupported types are: none/socks5/http/https/default' def set_viewport_size(self, width, height): """Sets the page viewport size. :param width: An integer that sets width pixel count. :param height: An integer that sets height pixel count. """ self.page.setViewportSize(QSize(width, height)) def append_popup_message(self, message): self.popup_messages.append(unicode(message)) def show(self): """Show current page inside a QWebView. """ self.webview = QtWebKit.QWebView() self.webview.setPage(self.page) self.webview.show() def sleep(self, value): started_at = time.time() while True: if time.time() > (started_at + value): break time.sleep(0.01) Ghost._app.processEvents() def wait_for(self, condition, timeout_message): """Waits until condition is True. :param condition: A callable that returns the condition. :param timeout_message: The exception message on timeout. """ started_at = time.time() while not condition(): if time.time() > (started_at + self.wait_timeout): raise TimeoutError(timeout_message) time.sleep(0.01) Ghost._app.processEvents() if self.wait_callback is not None: self.wait_callback() def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: Ghost._alert is not None, 'User has not been alerted.') msg = Ghost._alert Ghost._alert = None return msg, self._release_last_resources() def wait_for_page_loaded(self): """Waits until page is loaded, assumed that a page as been requested. """ self.wait_for(lambda: self.loaded, 'Unable to load requested page') resources = self._release_last_resources() page = None url = self.main_frame.url().toString() url_without_hash = url.split("#")[0] for resource in resources: if url == resource.url or url_without_hash == resource.url: page = resource return page, resources def wait_for_selector(self, selector): """Waits until selector match an element on the frame. :param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) return True, self._release_last_resources() def wait_for_text(self, text): """Waits until given text appear on main frame. :param text: The text to wait for. """ self.wait_for(lambda: text in self.content, 'Can\'t find "%s" in current frame' % text) return True, self._release_last_resources() def _authenticate(self, mix, authenticator): """Called back on basic / proxy http auth. :param mix: The QNetworkReply or QNetworkProxy object. :param authenticator: The QAuthenticator object. """ if self._auth_attempt == 0: username, password = self._auth authenticator.setUser(username) authenticator.setPassword(password) self._auth_attempt += 1 def _page_loaded(self): """Called back when page is loaded. """ self.loaded = True self.cache.clear() def _page_load_started(self): """Called back when page load started. """ self.loaded = False def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources = self.http_resources self.http_resources = [] return last_resources def _request_ended(self, reply): """Adds an HttpResource object to http_resources. :param reply: The QNetworkReply object. """ if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): Logger.log("[%s] bytesAvailable()= %s" % (str(reply.url()), reply.bytesAvailable()), level="debug") # Some web pages return cache headers that mandates not to cache the # reply, which means we won't find this QNetworkReply in the cache # object. In this case bytesAvailable will return > 0. # Such pages are www.etsy.com # This is a bit of a hack and due to the async nature of QT, might # not work at times. We should move to using some proxied implementation # of QNetworkManager and QNetworkReply in order to get the contents # of the requests properly rather than relying on the cache. if reply.bytesAvailable() > 0: content = reply.peek(reply.bytesAvailable()) else: content = None self.http_resources.append( HttpResource(reply, self.cache, content=content)) def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. :param reply: The QNetworkReply object. """ self.wait_for(lambda: reply.isFinished(), 'Download timeout.') if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): self.http_resources.append( HttpResource(reply, self.cache, reply.readAll())) def _on_manager_ssl_errors(self, reply, errors): url = unicode(reply.url().toString()) if self.ignore_ssl_errors: reply.ignoreSslErrors() else: Logger.log('SSL certificate error: %s' % url, level='warning')
class Browser(MouseMixin, PositionMixin, WaitMixin, JavaScriptMixin): def __init__(self, gui=False, traffic_rules=None): if not WKitScope.app: raise InternalError('You should use Browser instance' ' inside `with WKitScope():` block') self.app = WKitScope.app self.manager = WKitNetworkAccessManager(traffic_rules=traffic_rules) self.manager.finished.connect(self.handle_finished_network_reply) self.cookie_jar = QNetworkCookieJar() self.manager.setCookieJar(self.cookie_jar) self.page = WKitWebPage() self.page.setNetworkAccessManager(self.manager) self.page.loadFinished.connect(self.handle_page_load_finished) self.view = WKitWebView() self.view.setPage(self.page) self.view.setApplication(self.app) self._response = None self.gui = gui if gui: self.view.show() #def __del__(self): # self.view.close() # self.view.setPage(None) # del self.view # del self.page def get_cookies(self): return self.cookie_jar.allCookies() def get_simple_cookies(self): res = {} for cookie in self.cookie_jar.allCookies(): key = cookie.name().data().decode('latin') val = cookie.value().data().decode('latin') res[key] = val return res def go(self, url, **kwargs): return self.request(url=url, **kwargs) def request(self, url=None, user_agent=None, cookies=None, timeout=DEFAULT_PAGE_LOAD_TIMEOUT, referer=None, method='get', data=None, headers=None, proxy=None, wait=True): # Reset things bound to previous response self._response = None self.resource_list = [] self._page_loaded = False #self.view.setHtml('', QUrl('blank://')) # Proxy if proxy: self.manager.setup_proxy(proxy) # User-Agent if user_agent is None: user_agent = DEFAULT_USER_AGENT self.page.set_user_agent(user_agent) # Cookies if cookies is None: cookies = {} cookie_obj_list = [] for name, value in cookies.items(): domain = ('.' + urlsplit(url).netloc).split(':')[0] #print 'CREATE COOKIE %s=%s' % (name, value) #print 'DOMAIN = %s' % domain cookie_obj = QNetworkCookie(name, value) cookie_obj.setDomain(domain) cookie_obj_list.append(cookie_obj) #self.cookie_jar.setAllCookies(cookie_obj_list) # HTTP Method method_obj = getattr(QNetworkAccessManager, '%sOperation' % method.capitalize()) # Ensure that Content-Type is correct if method is post if method == 'post': headers['Content-Type'] = 'application/x-www-form-urlencoded' # POST Data if data is None: data = QByteArray() # Build Request object req = QNetworkRequest(QUrl(url)) # Referer if referer: req.setRawHeader('Referer', referer) # Headers if headers is None: headers = {} for name, value in headers.items(): req.setRawHeader(name, value) self.content_type_stats = Counter() # Spawn request self.view.load(req, method_obj, data) if wait: self.wait_for_page_loaded(timeout=timeout) return self.get_page_response() else: return None def sleep(self, sleep_time): start = time.time() while time.time() < start + sleep_time: time.sleep(0.01) self.app.processEvents() def get_url(self): return self.page.mainFrame().url().toString()\ .split('#')[0].rstrip('/') def get_page_response(self): if self._response: return self._response else: url = self.page.mainFrame().url().toString()\ .split('#')[0].rstrip('/') for res in self.resource_list: print('TEST', url, res.url.rstrip('/')) if url == res.url.rstrip('/'): self._response = res return res print('Resource list:') for res in self.resource_list: print(' * %s' % res.url) print('Current page URL: %s' % self.page.mainFrame().url().toString()) raise InternalError('Could not associate any of loaded responses' ' with requested URL: %s' % url) def assert_ok_response(self): if self.get_page_response().status_code != 200: raise HttpStatusNotSuccess def get_html(self): return self.page.mainFrame().toHtml() def get_doc(self): return self.page.mainFrame().documentElement() def get_element(self, query): elem = self.get_doc().findFirst(query) if elem.isNull(): raise IndexError('Could not find element: %s' % query) else: return elem def element_exists(self, query): try: self.get_element(query) except IndexError: return False else: return True def find_elements(self, query): return self.get_doc().findAll(query) def get_base_url(self): try: base = self.get_element('base[href]') except IndexError: return self.get_page_response().url else: url = base.attribute('href') return url or self.get_page_response().url def get_random_int_link(self): base_url = self.get_base_url() base_host = urlsplit(base_url).hostname links = [] for elem in self.find_elements('a[href]'): url = urljoin(base_url, elem.attribute('href')) host = urlsplit(url).hostname if host == base_host: if url != self.get_page_response().url: links.append(url) if len(links) > 50: break if links: return choice(links) else: return None # ************** # Event Handlers # ************** def handle_page_load_finished(self): self._page_loaded = True if self.gui: scripts = [] if False:#self.jquery_namespace: scripts.append('jquery-1.9.1.min.js', ) for script in scripts: self.evaluate_js_file(os.path.dirname(__file__) + '/js/' + script) self.evaluate(u"WKit = jQuery.noConflict();" % self.jquery_namespace) def handle_finished_network_reply(self, reply): status_code = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute) if status_code: if not isinstance(status_code, int): status_code = status_code.toInt()[0] logger_response.debug('HttpResource [%d]: %s' % (status_code, reply.url().toString())) self.resource_list.append(HttpResponse.build_from_reply(reply)) ctype = reply.rawHeader('Content-Type').data()\ .decode('latin').split(';')[0] self.content_type_stats[ctype] += 1