Exemplo n.º 1
0
    def tearDownClass(self):
        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        shutil.rmtree('./data/tests', ignore_errors=True)
Exemplo n.º 2
0
    def tearDownClass(self):
        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        shutil.rmtree('./data/tests', ignore_errors=True)
Exemplo n.º 3
0
    def tearDownClass(self):
        for each in self.ctx.obj.instances:
            each.quit()
        self.xmlrpc_thread.join()
        self.scheduler_thread.join()
        time.sleep(1)

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)

        shutil.rmtree('./data/tests', ignore_errors=True)
Exemplo n.º 4
0
    def tearDownClass(self):
        if self.process.is_alive():
            self.rpc._quit()
            self.process.join(5)
        self.xmlrpc_thread.join()
        assert not self.process.is_alive()
        shutil.rmtree('./data/tests', ignore_errors=True)
        time.sleep(1)

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(self.scheduler_xmlrpc_port)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
Exemplo n.º 5
0
    def tearDownClass(self):
        if self.process.is_alive():
            self.rpc._quit()
            self.process.join(5)
        self.xmlrpc_thread.join()
        assert not self.process.is_alive()
        shutil.rmtree('./data/tests', ignore_errors=True)
        time.sleep(1)

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(self.scheduler_xmlrpc_port)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
Exemplo n.º 6
0
    def tearDownClass(self):
        for each in self.ctx.obj.instances:
            each.quit()
        self.xmlrpc_thread.join()
        self.scheduler_thread.join()
        time.sleep(1)

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)

        shutil.rmtree('./data/tests', ignore_errors=True)
Exemplo n.º 7
0
    def tearDownClass(self):
        self.proxy_thread.terminate()
        self.proxy_thread.wait()
        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        self.rpc._quit()
        self.thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        time.sleep(1)
Exemplo n.º 8
0
    def tearDownClass(self):
        self.proxy_thread.terminate()
        self.proxy_thread.wait()
        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        self.rpc._quit()
        self.thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        time.sleep(1)
Exemplo n.º 9
0
    def tearDownClass(self):
        for each in self.ctx.obj.instances:
            each.quit()
        time.sleep(1)

        for thread in self.threads:
            thread.join()

        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        shutil.rmtree('./data/tests', ignore_errors=True)
Exemplo n.º 10
0
    def tearDownClass(self):
        for each in self.ctx.obj.instances:
            each.quit()
        time.sleep(1)

        for thread in self.threads:
            thread.join()

        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        shutil.rmtree('./data/tests', ignore_errors=True)
Exemplo n.º 11
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show,
          taskdb_bench, message_queue_bench, all_bench):
    """
    Run Benchmark test.
    In bench mode, in-memory sqlite database is used instead of on-disk sqlite database.
    """
    from pyspider.libs import bench
    from pyspider.webui import bench_test  # flake8: noqa

    ctx.obj['debug'] = False
    g = ctx.obj
    if result_worker_num == 0:
        g['processor2result'] = None

    if run_in == 'subprocess' and os.name != 'nt':
        run_in = utils.run_in_subprocess
    else:
        run_in = utils.run_in_thread

    all_test = not taskdb_bench and not message_queue_bench and not all_bench

    # test taskdb
    if all_test or taskdb_bench:
        bench.bench_test_taskdb(g.taskdb)
    # test message queue
    if all_test or message_queue_bench:
        bench.bench_test_message_queue(g.scheduler2fetcher)
    # test all
    if not all_test and not all_bench:
        return

    project_name = 'bench'

    def clear_project():
        g.taskdb.drop(project_name)
        g.resultdb.drop(project_name)

    clear_project()

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger('scheduler').setLevel(logging.ERROR)
    logging.getLogger('fetcher').setLevel(logging.ERROR)
    logging.getLogger('processor').setLevel(logging.ERROR)
    logging.getLogger('result').setLevel(logging.ERROR)
    logging.getLogger('webui').setLevel(logging.ERROR)
    logging.getLogger('werkzeug').setLevel(logging.ERROR)

    try:
        threads = []

        # result worker
        result_worker_config = g.config.get('result_worker', {})
        for i in range(result_worker_num):
            threads.append(run_in(ctx.invoke, result_worker,
                                  result_cls='pyspider.libs.bench.BenchResultWorker',
                                  **result_worker_config))

        # processor
        processor_config = g.config.get('processor', {})
        for i in range(processor_num):
            threads.append(run_in(ctx.invoke, processor,
                                  processor_cls='pyspider.libs.bench.BenchProcessor',
                                  **processor_config))

        # fetcher
        fetcher_config = g.config.get('fetcher', {})
        fetcher_config.setdefault('xmlrpc_host', '127.0.0.1')
        for i in range(fetcher_num):
            threads.append(run_in(ctx.invoke, fetcher,
                                  fetcher_cls='pyspider.libs.bench.BenchFetcher',
                                  **fetcher_config))

        # webui
        webui_config = g.config.get('webui', {})
        webui_config.setdefault('scheduler_rpc', 'http://127.0.0.1:%s/'
                                % g.config.get('scheduler', {}).get('xmlrpc_port', 23333))
        threads.append(run_in(ctx.invoke, webui, **webui_config))

        # scheduler
        scheduler_config = g.config.get('scheduler', {})
        scheduler_config.setdefault('xmlrpc_host', '127.0.0.1')
        scheduler_config.setdefault('xmlrpc_port', 23333)
        threads.append(run_in(ctx.invoke, scheduler,
                              scheduler_cls='pyspider.libs.bench.BenchScheduler',
                              **scheduler_config))
        scheduler_rpc = connect_rpc(ctx, None,
                                    'http://%(xmlrpc_host)s:%(xmlrpc_port)s/' % scheduler_config)

        for _ in range(20):
            if utils.check_port_open(23333):
                break
            time.sleep(1)

        scheduler_rpc.newtask({
            "project": project_name,
            "taskid": "on_start",
            "url": "data:,on_start",
            "fetch": {
                "save": {"total": total, "show": show}
            },
            "process": {
                "callback": "on_start",
            },
        })

        # wait bench test finished
        while True:
            time.sleep(1)
            if scheduler_rpc.size() == 0:
                break
    finally:
        # exit components run in threading
        for each in g.instances:
            each.quit()

        # exit components run in subprocess
        for each in threads:
            if hasattr(each, 'terminate'):
                each.terminate()
            each.join(1)

        clear_project()
Exemplo n.º 12
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show,
          taskdb_bench, message_queue_bench, all_bench):
    """
    Run Benchmark test.
    In bench mode, in-memory sqlite database is used instead of on-disk sqlite database.
    """
    from pyspider.libs import bench
    from pyspider.webui import bench_test  # flake8: noqa

    ctx.obj['debug'] = False
    g = ctx.obj
    if result_worker_num == 0:
        g['processor2result'] = None

    if run_in == 'subprocess' and os.name != 'nt':
        run_in = utils.run_in_subprocess
    else:
        run_in = utils.run_in_thread

    all_test = not taskdb_bench and not message_queue_bench and not all_bench

    # test taskdb
    if all_test or taskdb_bench:
        bench.bench_test_taskdb(g.taskdb)
    # test message queue
    if all_test or message_queue_bench:
        bench.bench_test_message_queue(g.scheduler2fetcher)
    # test all
    if not all_test and not all_bench:
        return

    project_name = '__bench_test__'

    def clear_project():
        g.taskdb.drop(project_name)
        g.projectdb.drop(project_name)
        g.resultdb.drop(project_name)

    clear_project()
    g.projectdb.insert(project_name, {
        'name': project_name,
        'status': 'RUNNING',
        'script': bench.bench_script % {'total': total, 'show': show},
        'rate': total,
        'burst': total,
        'updatetime': time.time()
    })

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger('scheduler').setLevel(logging.ERROR)
    logging.getLogger('fetcher').setLevel(logging.ERROR)
    logging.getLogger('processor').setLevel(logging.ERROR)
    logging.getLogger('result').setLevel(logging.ERROR)
    logging.getLogger('webui').setLevel(logging.ERROR)
    logging.getLogger('werkzeug').setLevel(logging.ERROR)

    try:
        threads = []

        # result worker
        result_worker_config = g.config.get('result_worker', {})
        for i in range(result_worker_num):
            threads.append(run_in(ctx.invoke, result_worker,
                                  result_cls='pyspider.libs.bench.BenchResultWorker',
                                  **result_worker_config))

        # processor
        processor_config = g.config.get('processor', {})
        for i in range(processor_num):
            threads.append(run_in(ctx.invoke, processor,
                                  processor_cls='pyspider.libs.bench.BenchProcessor',
                                  **processor_config))

        # fetcher
        fetcher_config = g.config.get('fetcher', {})
        fetcher_config.setdefault('xmlrpc_host', '127.0.0.1')
        for i in range(fetcher_num):
            threads.append(run_in(ctx.invoke, fetcher,
                                  fetcher_cls='pyspider.libs.bench.BenchFetcher',
                                  **fetcher_config))

        # webui
        webui_config = g.config.get('webui', {})
        webui_config.setdefault('scheduler_rpc', 'http://127.0.0.1:%s/'
                                % g.config.get('scheduler', {}).get('xmlrpc_port', 23333))
        threads.append(run_in(ctx.invoke, webui, **webui_config))

        # scheduler
        scheduler_config = g.config.get('scheduler', {})
        scheduler_config.setdefault('xmlrpc_host', '127.0.0.1')
        scheduler_config.setdefault('xmlrpc_port', 23333)
        threads.append(run_in(ctx.invoke, scheduler,
                              scheduler_cls='pyspider.libs.bench.BenchScheduler',
                              **scheduler_config))
        scheduler_rpc = connect_rpc(ctx, None,
                                    'http://%(xmlrpc_host)s:%(xmlrpc_port)s/' % scheduler_config)

        for _ in range(20):
            if utils.check_port_open(23333):
                break
            time.sleep(1)

        scheduler_rpc.newtask({
            "project": project_name,
            "taskid": "on_start",
            "url": "data:,on_start",
            "process": {
                "callback": "on_start",
            },
        })

        # wait bench test finished
        while True:
            time.sleep(1)
            if scheduler_rpc.size() == 0:
                break
    finally:
        # exit components run in threading
        for each in g.instances:
            each.quit()

        # exit components run in subprocess
        for each in threads:
            if hasattr(each, 'terminate'):
                each.terminate()
            each.join(1)

        clear_project()