Exemplo n.º 1
0
def run_webui(g=g):
    import xmlrpclib
    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['scheduler_rpc'] = xmlrpclib.ServerProxy('http://localhost:23333')
    g.app = app
    app.run()
Exemplo n.º 2
0
def webui(ctx,
          host,
          port,
          cdn,
          scheduler_rpc,
          fetcher_rpc,
          max_rate,
          max_burst,
          username,
          password,
          need_auth,
          app=app):
    g = ctx.obj
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['cdn'] = cdn

    if max_rate:
        app.config['max_rate'] = max_rate
    if max_burst:
        app.config['max_burst'] = max_burst
    if username:
        app.config['webui_username'] = username
    if password:
        app.config['webui_password'] = password

    # fetcher rpc
    if isinstance(fetcher_rpc, six.string_types):
        fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc)
    if fetcher_rpc is None:
        fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
        fetcher.phantomjs_proxy = g.phantomjs_proxy
        app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    else:
        import umsgpack
        app.config['fetch'] = lambda x: umsgpack.unpackb(
            fetcher_rpc.fetch(x).data)

    if isinstance(scheduler_rpc, six.string_types):
        scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc)
    if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'):
        app.config['scheduler_rpc'] = connect_rpc(
            ctx, None, 'http://%s/' %
            (os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):]))
    elif scheduler_rpc is None:
        app.config['scheduler_rpc'] = connect_rpc(ctx, None,
                                                  'http://localhost:23333/')
    else:
        app.config['scheduler_rpc'] = scheduler_rpc

    app.debug = g.debug
    g.instances.append(app)
    if g.get('testing_mode'):
        return app

    app.run(host=host, port=port)
Exemplo n.º 3
0
def run_webui(g=g):
    import xmlrpclib
    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['scheduler_rpc'] = xmlrpclib.ServerProxy(
        'http://localhost:23333')
    g.app = app
    app.run()
Exemplo n.º 4
0
def webui(ctx, host, port, cdn, scheduler_rpc, fetcher_rpc,
          max_rate, max_burst, username, password):
    g = ctx.obj
    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['cdn'] = cdn

    if max_rate:
        app.config['max_rate'] = max_rate
    if max_burst:
        app.config['max_burst'] = max_burst
    if username:
        app.config['webui_username'] = username
    if password:
        app.config['webui_password'] = password

    # fetcher rpc
    if isinstance(fetcher_rpc, basestring):
        fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc)
    if fetcher_rpc is None:
        from pyspider.fetcher.tornado_fetcher import Fetcher
        fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
        fetcher.phantomjs_proxy = g.phantomjs_proxy
        app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    else:
        import umsgpack
        app.config['fetch'] = lambda x: umsgpack.unpackb(fetcher_rpc.fetch(x).data)

    if isinstance(scheduler_rpc, basestring):
        scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc)
    if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'):
        app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://%s/' % (
            os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):]))
    elif scheduler_rpc is None:
        app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://localhost:23333/')
    else:
        app.config['scheduler_rpc'] = scheduler_rpc

    app.debug = g.debug
    if g.get('testing_mode'):
        return app

    app.run(host=host, port=port)
Exemplo n.º 5
0
def run_webui(g=g):
    import cPickle as pickle

    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
    fetcher.phantomjs_proxy = g.phantomjs_proxy

    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    app.config['scheduler_rpc'] = g.scheduler_rpc
    #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/'
    if g.demo_mode:
        app.config['max_rate'] = 0.2
        app.config['max_burst'] = 3.0
    if 'WEBUI_USERNAME' in os.environ:
        app.config['webui_username'] = os.environ['WEBUI_USERNAME']
        app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '')
    if not getattr(g, 'all_in_one', False):
        app.debug = g.debug
    app.run(host=g.webui_host, port=g.webui_port)
Exemplo n.º 6
0
def run_webui(g=g):
    import cPickle as pickle

    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
    fetcher.phantomjs_proxy = g.phantomjs_proxy

    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    app.config['scheduler_rpc'] = g.scheduler_rpc
    #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/'
    if g.demo_mode:
        app.config['max_rate'] = 0.2
        app.config['max_burst'] = 3.0
    if 'WEBUI_USERNAME' in os.environ:
        app.config['webui_username'] = os.environ['WEBUI_USERNAME']
        app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '')
    if not getattr(g, 'all_in_one', False):
        app.debug = g.debug
    app.run(host=g.webui_host, port=g.webui_port)
Exemplo n.º 7
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show):
    from pyspider.libs import bench
    from pyspider.webui import bench_test

    ctx.obj['debug'] = False
    g = ctx.obj
    if result_worker_num == 0:
        g['processor2result'] = None

    if run_in == 'subprocess' and os.name != 'nt':
        run_in = run_in_subprocess
    else:
        run_in = run_in_thread

    g.projectdb.insert('bench', {
        'name': 'bench',
        'status': 'RUNNING',
        'script': bench.bench_script % {'total': total, 'show': show},
        'rate': 100000000000000,
        'burst': 10000000000000000,
        'updatetime': time.time()
    })

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger('scheduler').setLevel(logging.ERROR)
    logging.getLogger('fetcher').setLevel(logging.ERROR)
    logging.getLogger('processor').setLevel(logging.ERROR)
    logging.getLogger('result').setLevel(logging.ERROR)
    logging.getLogger('webui').setLevel(logging.ERROR)

    threads = []

    # result worker
    result_worker_config = g.config.get('result_worker', {})
    for i in range(result_worker_num):
        threads.append(run_in(ctx.invoke, result_worker,
                              ResultWorker=bench.BenchResultWorker, **result_worker_config))

    # processor
    processor_config = g.config.get('processor', {})
    for i in range(processor_num):
        threads.append(run_in(ctx.invoke, processor,
                              Processor=bench.BenchProcessor, **processor_config))

    # fetcher
    fetcher_config = g.config.get('fetcher', {})
    fetcher_config.setdefault('xmlrpc_host', '127.0.0.1')
    for i in range(fetcher_num):
        threads.append(run_in(ctx.invoke, fetcher,
                              Fetcher=bench.BenchFetcher, **fetcher_config))

    # scheduler
    scheduler_config = g.config.get('scheduler', {})
    scheduler_config.setdefault('xmlrpc_host', '127.0.0.1')
    threads.append(run_in(ctx.invoke, scheduler,
                          Scheduler=bench.BenchScheduler, **scheduler_config))

    # running webui in main thread to make it exitable
    webui_config = g.config.get('webui', {})
    webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/'
                            % g.config.get('scheduler', {}).get('xmlrpc_port', 23333))
    g['testing_mode'] = True
    app = ctx.invoke(webui, **webui_config)

    # run project
    app_client = app.test_client()
    rv = app_client.post('/run', data={
        'project': 'bench',
    })
    assert rv.status_code == 200, 'run project error'

    app.run('127.0.0.1', 5000)

    for each in g.instances:
        each.quit()

    for each in threads:
        each.join()