def parseArgs(app, args):
    # Handle all command-line options
    p = argParser()
    arg_data = p.parse_known_args(args)
    args = arg_data[0]
    args.script_args = arg_data[1]

    # convert script args to unicode
    for i, arg in enumerate(args.script_args):
        args.script_args[i] = unicode(arg, 'utf-8')

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    file_check = (args.cookies_file, args.config)
    for file_ in file_check:
        if file_ is not None and not os.path.exists(file_):
            sys.exit("No such file or directory: '%s'" % file_)

    if args.config:
        config = Config(app, args.config)
        # apply settings
        for setting in config.settings:
            setattr(args, config.settings[setting]['mapping'], config.property(setting))

    split_check = (
        (args.proxy, 'proxy'),
    )
    for arg, name in split_check:
        if arg:
            item = arg.split(':')
            if len(item) < 2 or not len(item[1]):
                p.print_help()
                sys.exit(1)
            setattr(args, name, item)

    if args.proxy is not None:
        if args.proxy_type == 'socks5':
            args.proxy_type = QNetworkProxy.Socks5Proxy

    do_action('ParseArgs', args)

    if args.debug:
        debug(args.debug)

    # verbose flag got changed on us, so we reload the flag
    if messageHandler.verbose != args.verbose:
        messageHandler.verbose = args.verbose

    if args.script is None:
        p.print_help()
        sys.exit(1)

    if not os.path.exists(args.script):
        sys.exit("No such file or directory: '%s'" % args.script)

    return args
示例#2
0
def parseArgs(app, args):
    # Handle all command-line options
    p = argParser()
    arg_data = p.parse_known_args(args)
    args = arg_data[0]
    args.script_args = arg_data[1]

    # convert script args to unicode
    for i, arg in enumerate(args.script_args):
        args.script_args[i] = unicode(arg, 'utf-8')

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    file_check = (args.cookies_file, args.config)
    for file_ in file_check:
        if file_ is not None and not os.path.exists(file_):
            sys.exit("No such file or directory: '%s'" % file_)

    if args.config:
        config = Config(app, args.config)
        # apply settings
        for setting in config.settings:
            setattr(args, config.settings[setting]['mapping'],
                    config.property(setting))

    split_check = ((args.proxy, 'proxy'), )
    for arg, name in split_check:
        if arg:
            item = arg.split(':')
            if len(item) < 2 or not len(item[1]):
                p.print_help()
                sys.exit(1)
            setattr(args, name, item)

    if args.proxy is not None:
        if args.proxy_type == 'socks5':
            args.proxy_type = QNetworkProxy.Socks5Proxy

    do_action('ParseArgs', args)

    if args.debug:
        debug(args.debug)

    # verbose flag got changed on us, so we reload the flag
    if messageHandler.verbose != args.verbose:
        messageHandler.verbose = args.verbose

    if args.script is None:
        p.print_help()
        sys.exit(1)

    if not os.path.exists(args.script):
        sys.exit("No such file or directory: '%s'" % args.script)

    return args
示例#3
0
def parseArgs(app, args):
    # Handle all command-line options
    p = argParser()
    arg_data = p.parse_known_args(args)
    args = arg_data[0]
    args.script_args = arg_data[1]

    args.disk_cache = False if args.disk_cache == 'no' else True
    args.ignore_ssl_errors = False if args.ignore_ssl_errors == 'no' else True
    args.load_images = True if args.load_images == 'yes' else False
    args.load_plugins = False if args.load_plugins == 'no' else True
    args.local_access_remote = False if args.local_access_remote == 'no' else True

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    file_check = (args.cookies, args.config)
    for file_ in file_check:
        if file_ is not None and not os.path.exists(file_):
            sys.exit("No such file or directory: '%s'" % file_)

    if args.config:
        config = Config(app, args.config)
        # apply settings
        for setting in config.settings:
            setattr(args, config.settings[setting]['mapping'], config.property(setting))

            # special case for verbose arg, which will need to be re-applied
            if setting == 'verbose':
                messageHandler.verbose = args.verbose

    split_check = (
        (args.proxy, 'proxy'),
        (args.auth, 'auth')
    )
    for arg, name in split_check:
        if arg:
            item = arg.split(':')
            if len(item) < 2 or not len(item[1]):
                p.print_help()
                sys.exit(1)
            setattr(args, name, item)

    do_action('ParseArgs')

    if args.script is None:
        p.print_help()
        sys.exit(1)

    if not os.path.exists(args.script):
        sys.exit("No such file or directory: '%s'" % args.script)

    return args
示例#4
0
    def __init__(self, name, settings):
        super(Application, self).__init__([])

        self.name = name
        self.settings = settings

        self.web_view = QWebView()

        self._exit_timer = QTimer(self)
        self._exit_timer.setSingleShot(True)
        self._exit_timer.setInterval(1000)

        self._expects = []
        self._expects_if_timeout = []
        self._expects_timer = QTimer(self)
        self._expects_timer.setSingleShot(True)
        self._expects_timer.timeout.connect(self._on_expects_timeout)
        self._frame_data = {}
        self._frame_data_lock = Lock()
        self._frame_timer = QTimer(self)
        self._frame_timer.start(3000)
        self._queue = Queue()
        self._trigger_delay_timer = QTimer(self)
        self._trigger_delay_timer.setSingleShot(True)
        self._visible = int(self.settings['application.visible'])

        self.web_page = WebPage(self.web_view)
        self.web_page.log_event.connect(self.log_event)
        self.web_page.frameCreated.connect(self._on_frame_created)
        self._on_frame_created(self.web_page.mainFrame())
        #self.web_page.networkAccessManager().finished.connect(
        #        self._on_http_response)

        self.web_view.setPage(self.web_page)

        st = self.web_page.settings()
        st.setAttribute(st.AutoLoadImages,
                        int(self.settings['application.settings.load_images']))
        st.setAttribute(
            st.JavaEnabled,
            int(self.settings['application.settings.java_enabled']))
        st.setAttribute(
            st.PluginsEnabled,
            int(self.settings['application.settings.plugins_enabled']))

        self.clear_handlers()

        # redirect qt related messages
        try:
            qInstallMessageHandler(self._pyqt5_null_message_handler)
        except NameError:
            qInstallMsgHandler(self._pyqt4_null_message_handler)
示例#5
0
    def __init__(self,
                 user_agent=default_user_agent,
                 wait_timeout=20,
                 wait_callback=None,
                 log_level=logging.WARNING,
                 display=False,
                 viewport_size=(800, 600),
                 cache_dir='/tmp/ghost.py',
                 cache_size=10,
                 plugin_path=[
                     '/usr/lib/mozilla/plugins',
                 ],
                 share_cookies=True,
                 share_cache=True,
                 qt_debug=False):

        self.user_agent = user_agent
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.viewport_size = viewport_size
        self.log_level = log_level
        self.display = display
        self.share_cookies = share_cookies
        self.share_cache = share_cache
        self.cache_dir = cache_dir
        self.cache_size = cache_size
        self.network_managers = []
        self.current_page = None
        self._pages = []

        if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\
                and not hasattr(Ghost, 'xvfb'):
            try:
                os.environ['DISPLAY'] = ':99'
                Ghost.xvfb = subprocess.Popen(['Xvfb', ':99'])
            except OSError:
                raise Exception('Xvfb is required to a ghost run oustside ' +\
                    'an X instance')

        self.display = display
        if not Ghost._app:
            Ghost._app = QApplication.instance() or QApplication(['ghost'])
            qInstallMsgHandler(QTMessageProxy(qt_debug))
        for p in plugin_path:
            Ghost._app.addLibraryPath(p)
        QtWebKit.QWebSettings.setMaximumPagesInCache(0)
        QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0)

        logger.setLevel(log_level)
示例#6
0
def main():
    args = parseArgs(sys.argv[1:])

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    app = QApplication(sys.argv)

    app.setWindowIcon(QIcon(':/resources/pyphantomjs-icon.png'))
    app.setApplicationName('PyPhantomJS')
    app.setOrganizationName('Umaclan Development')
    app.setOrganizationDomain('www.umaclan.com')
    app.setApplicationVersion(version)

    phantom = Phantom(args, app)
    phantom.execute()
    app.exec_()
    sys.exit(phantom.returnValue())
示例#7
0
def main():
    args = parseArgs(sys.argv[1:])

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    app = QApplication(sys.argv)

    app.setWindowIcon(QIcon(':/resources/pyphantomjs-icon.png'))
    app.setApplicationName('PyPhantomJS')
    app.setOrganizationName('Umaclan Development')
    app.setOrganizationDomain('www.umaclan.com')
    app.setApplicationVersion(version)

    phantom = Phantom(args, app)
    phantom.execute()
    app.exec_()
    sys.exit(phantom.returnValue())
示例#8
0
    def __init__(self, user_agent=default_user_agent, wait_timeout=20,
            wait_callback=None, log_level=logging.WARNING, display=False,
            viewport_size=(800, 600), cache_dir='/tmp/ghost.py', cache_size=10,
            plugin_path=['/usr/lib/mozilla/plugins',],
            share_cookies=True, share_cache=True, qt_debug=False):
        
        self.user_agent = user_agent
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.viewport_size = viewport_size
        self.log_level = log_level
        self.display = display
        self.share_cookies = share_cookies
        self.share_cache = share_cache
        self.cache_dir = cache_dir
        self.cache_size = cache_size
        self.network_managers = []
        self.current_page = None
        self._pages = []
        
        if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\
                and not hasattr(Ghost, 'xvfb'):
            try:
                os.environ['DISPLAY'] = ':99'
                Ghost.xvfb = subprocess.Popen(['Xvfb', ':99'])
            except OSError:
                raise Exception('Xvfb is required to a ghost run oustside ' +\
                    'an X instance')

        self.display = display
        if not Ghost._app:
            Ghost._app = QApplication.instance() or QApplication(['ghost'])
            qInstallMsgHandler(QTMessageProxy(qt_debug))
        for p in plugin_path:
            Ghost._app.addLibraryPath(p)
        QtWebKit.QWebSettings.setMaximumPagesInCache(0)
        QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0)
        
        logger.setLevel(log_level)
示例#9
0
def main():
    args = parseArgs(sys.argv[1:])

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    app = QApplication(sys.argv)

    app.setWindowIcon(QIcon(":/resources/pyphantomjs-icon.png"))
    app.setApplicationName("PyPhantomJS")
    app.setOrganizationName("Umaclan Development")
    app.setOrganizationDomain("www.umaclan.com")
    app.setApplicationVersion(version)

    phantom = Phantom(args)

    do_action("Main", Bunch(locals()))

    if phantom.execute():
        app.exec_()
    return phantom.returnValue()
示例#10
0
from PyQt4.QtCore import QString, qInstallMsgHandler, qFatal
from PyQt4.QtGui import *

# make keyboard interrupt quit program
import signal

signal.signal(signal.SIGINT, signal.SIG_DFL)

if __name__ == "__main__":
    # Handle all command-line options
    p = argParser()
    args = p.parse_args()

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    if args.upload_file:
        item_buffer = {}
        for i in range(len(args.upload_file)):
            item = args.upload_file[i].split("=")
            if len(item) < 2 or not len(item[1]):
                if len(item_buffer) == 0:
                    p.print_help()
                    sys.exit(1)
                args.script = args.upload_file[i:]
                break
            item_buffer[QString(item[0])] = QString(item[1])
        for tag in item_buffer:
            if not os.path.exists(item_buffer[tag]):
                qFatal("No such file or directory: '%s'" % item_buffer[tag])
示例#11
0
from PyQt4.QtGui import QIcon, QApplication

# make keyboard interrupt quit program
import signal
signal.signal(signal.SIGINT, signal.SIG_DFL)

if __name__ == '__main__':
    # Handle all command-line options
    p = argParser()
    arg_data = p.parse_known_args(sys.argv[1:])
    args = arg_data[0]
    args.script_args = arg_data[1]

    # register an alternative Message Handler
    messageHandler = MessageHandler(args.verbose)
    qInstallMsgHandler(messageHandler.process)

    if args.upload_file:
        item_buffer = {}
        for i in range(len(args.upload_file)):
            item = args.upload_file[i].split('=')
            if len(item) < 2 or not len(item[1]):
                if len(item_buffer) == 0:
                    p.print_help()
                    sys.exit(1)

                # this is a bug workaround for argparse.
                # if you call parse_known_args, and you
                # have an --option script arg, the args
                # get jumbled up, and it's inconsistent
                #
示例#12
0
import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl, qInstallMsgHandler
from PyQt4.QtWebKit import QWebPage, QWebSettings

# comment this line to show QT warnings/errors:
qInstallMsgHandler(lambda *args: None)


class Render(QWebPage):
    def __init__(self, cb, dump_file):
        self.app = QApplication(sys.argv)
        QWebPage.__init__(self)
        self.mainFrame().loadFinished.connect(self._loadFinished)
        self.cb = cb
        self.html_dump = dump_file
        self.settings().setAttribute(QWebSettings.AutoLoadImages, False)

    def crawl(self, url):
        print('Downloading', url)
        self.mainFrame().load(QUrl(url))
        self.app.exec_()

    def _loadFinished(self, result):
        frame = self.mainFrame()
        url = str(frame.url().toString())
        html = frame.toHtml()
        self.cb(url, html)
        with open(self.html_dump, 'w', encoding='utf-8') as dump:
            dump.write(html)
        self.app.quit()
示例#13
0
文件: ghost.py 项目: mozii/Ghost.py
    def __init__(self,
                 user_agent=default_user_agent,
                 wait_timeout=8,
                 wait_callback=None,
                 log_level=logging.WARNING,
                 display=False,
                 viewport_size=(800, 600),
                 ignore_ssl_errors=True,
                 cache_dir=os.path.join(tempfile.gettempdir(), "ghost.py"),
                 plugins_enabled=False,
                 java_enabled=False,
                 plugin_path=[
                     '/usr/lib/mozilla/plugins',
                 ],
                 download_images=True,
                 qt_debug=False):
        self.http_resources = []

        self.user_agent = user_agent
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.ignore_ssl_errors = ignore_ssl_errors
        self.loaded = True

        if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\
                and not hasattr(Ghost, 'xvfb'):
            try:
                os.environ['DISPLAY'] = ':99'
                Ghost.xvfb = subprocess.Popen(['Xvfb', ':99'])
            except OSError:
                raise Error('Xvfb is required to a ghost run outside ' +
                            'an X instance')

        self.display = display

        if not Ghost._app:
            Ghost._app = QApplication.instance() or QApplication(['ghost'])
            qInstallMsgHandler(QTMessageProxy(qt_debug))
            if plugin_path:
                for p in plugin_path:
                    Ghost._app.addLibraryPath(p)

        self.popup_messages = []
        self.page = GhostWebPage(Ghost._app, self)
        QtWebKit.QWebSettings.setMaximumPagesInCache(0)
        QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0)
        QtWebKit.QWebSettings.globalSettings().setAttribute(
            QtWebKit.QWebSettings.LocalStorageEnabled, True)

        self.page.setForwardUnsupportedContent(True)
        self.page.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages,
                                          download_images)
        self.page.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled,
                                          plugins_enabled)
        self.page.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled,
                                          java_enabled)

        self.set_viewport_size(*viewport_size)

        # Page signals
        self.page.loadFinished.connect(self._page_loaded)
        self.page.loadStarted.connect(self._page_load_started)
        self.page.unsupportedContent.connect(self._unsupported_content)

        self.manager = self.page.networkAccessManager()
        self.manager.finished.connect(self._request_ended)
        self.manager.sslErrors.connect(self._on_manager_ssl_errors)
        # Cache
        self.cache = QNetworkDiskCache()
        self.cache.setCacheDirectory(cache_dir)
        self.manager.setCache(self.cache)
        # Cookie jar
        self.cookie_jar = QNetworkCookieJar()
        self.manager.setCookieJar(self.cookie_jar)
        # User Agent
        self.page.setUserAgent(self.user_agent)

        self.page.networkAccessManager().authenticationRequired\
            .connect(self._authenticate)
        self.page.networkAccessManager().proxyAuthenticationRequired\
            .connect(self._authenticate)

        self.main_frame = self.page.mainFrame()

        logger.setLevel(log_level)

        if self.display:

            class MyQWebView(QtWebKit.QWebView):
                def sizeHint(self):
                    return QSize(*viewport_size)

            self.webview = MyQWebView()
            if plugins_enabled:
                self.webview.settings().setAttribute(
                    QtWebKit.QWebSettings.PluginsEnabled, True)
            if java_enabled:
                self.webview.settings().setAttribute(
                    QtWebKit.QWebSettings.JavaEnabled, True)
            self.webview.setPage(self.page)
            self.webview.show()
        else:
            self.webview = None
示例#14
0
    def __call__(self, msgType, wtf, msg):
        #print('1)', msgType)
        #print('2)', msg)
        #print('3)', zz)
        levels = {
            QtDebugMsg: 'debug',
            QtWarningMsg: 'warn',
            QtCriticalMsg: 'critical',
            QtFatalMsg: 'fatal',
        }
        getattr(self.logger, levels[msgType])(msg)


qt_logger = configure_logger('qt', 'QT', logging.DEBUG, logging.StreamHandler())
qInstallMsgHandler(QTMessageProxy(qt_logger))


class WKitWebView(QWebView):
    def setApplication(self, app):
        self.app = app

    def closeEvent(self, event):
        self.app.quit()

    def sizeHint(self):
        viewport_size = (800, 600)
        return QSize(*viewport_size)


class WKitWebPage(QWebPage):
示例#15
0
try:
    from queue import Empty, Queue
except ImportError:
    from Queue import Empty, Queue
from uuid import uuid4

from core.helpers import flatten_settings_definition, make_list
from core.proxy import FrameData, Proxy


def null_message_handler(*args, **kwargs):
    pass


# disable qt related messages
qInstallMsgHandler(null_message_handler)


class WebPage(QWebPage):
    """
    QWebPage that prints Javascript errors to logger.

    Adapted from http://www.tylerlesmann.com/2009/oct/01/web-scraping-pyqt4/
    """
    onLog = pyqtSignal(int, str)

    def javaScriptConsoleMessage(self, message, lineNumber, sourceID):
        self.onLog.emit(
            DEBUG, 'Javascript:%s:%s: %s' % (sourceID, lineNumber, message))

示例#16
0
    def __init__(self, user_agent=default_user_agent, wait_timeout=8,
            wait_callback=None, log_level=logging.WARNING, display=False,
            viewport_size=(800, 600), ignore_ssl_errors=True,
            cache_dir=os.path.join(tempfile.gettempdir(), "ghost.py"),
            plugins_enabled=False, java_enabled=False,
            plugin_path=['/usr/lib/mozilla/plugins',],
            download_images=True, qt_debug=False):
        self.http_resources = []

        self.user_agent = user_agent
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.ignore_ssl_errors = ignore_ssl_errors
        self.loaded = True

        if not sys.platform.startswith('win') and not 'DISPLAY' in os.environ\
                and not hasattr(Ghost, 'xvfb'):
            try:
                os.environ['DISPLAY'] = ':99'
                Ghost.xvfb = subprocess.Popen(['Xvfb', ':99'])
            except OSError:
                raise Error('Xvfb is required to a ghost run outside ' +
                            'an X instance')

        self.display = display

        if not Ghost._app:
            Ghost._app = QApplication.instance() or QApplication(['ghost'])
            qInstallMsgHandler(QTMessageProxy(qt_debug))
            if plugin_path:
                for p in plugin_path:
                    Ghost._app.addLibraryPath(p)

        self.popup_messages = []
        self.page = GhostWebPage(Ghost._app, self)
        QtWebKit.QWebSettings.setMaximumPagesInCache(0)
        QtWebKit.QWebSettings.setObjectCacheCapacities(0, 0, 0)
        QtWebKit.QWebSettings.globalSettings().setAttribute(QtWebKit.QWebSettings.LocalStorageEnabled, True)

        self.page.setForwardUnsupportedContent(True)
        self.page.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images)
        self.page.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled)
        self.page.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled)

        self.set_viewport_size(*viewport_size)

        # Page signals
        self.page.loadFinished.connect(self._page_loaded)
        self.page.loadStarted.connect(self._page_load_started)
        self.page.unsupportedContent.connect(self._unsupported_content)

        self.manager = self.page.networkAccessManager()
        self.manager.finished.connect(self._request_ended)
        self.manager.sslErrors.connect(self._on_manager_ssl_errors)
        # Cache
        self.cache = QNetworkDiskCache()
        self.cache.setCacheDirectory(cache_dir)
        self.manager.setCache(self.cache)
        # Cookie jar
        self.cookie_jar = QNetworkCookieJar()
        self.manager.setCookieJar(self.cookie_jar)
        # User Agent
        self.page.setUserAgent(self.user_agent)

        self.page.networkAccessManager().authenticationRequired\
            .connect(self._authenticate)
        self.page.networkAccessManager().proxyAuthenticationRequired\
            .connect(self._authenticate)

        self.main_frame = self.page.mainFrame()

        logger.setLevel(log_level)

        if self.display:
            class MyQWebView(QtWebKit.QWebView):
                def sizeHint(self):
                    return QSize(*viewport_size)
            self.webview = MyQWebView()
            if plugins_enabled:
                self.webview.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, True)
            if java_enabled:
                self.webview.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, True)
            self.webview.setPage(self.page)
            self.webview.show()
        else:
            self.webview = None
示例#17
0
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *
from PyQt4.QtCore import QUrl

#prevent qt messages from the Terminal screen
from PyQt4.QtCore import qInstallMsgHandler
from PyQt4.Qt import QtMsgType


def myQtMsgHandler(msg_type, msg_string):
    pass


qInstallMsgHandler(myQtMsgHandler)

from scrapex import common, http, agent

app = QApplication(sys.argv)


class WebView(QWebView):
    def __init__(self,
                 show=False,
                 timeout=30,
                 image=False,
                 js=True,
                 **options):

        QWebView.__init__(self)