Exemplo n.º 1
0
class TestThreadPoolPerformance(TestBase):
	
	max_threads = cpu_count()
	
	def test_base(self):
		# create a dependency network, and see how the performance changes
		# when adjusting the amount of threads
		pool = ThreadPool(0)
		ni = 1000				# number of items to process
		print self.max_threads
		for num_threads in range(self.max_threads*2 + 1):
			pool.set_size(num_threads)
			for num_transformers in (1, 5, 10):
				for read_mode in range(2):
					ts, rcs = add_task_chain(pool, ni, count=num_transformers, 
												feedercls=IteratorThreadTask, 
												transformercls=TestPerformanceThreadTask, 
												include_verifier=False)
					
					mode_info = "read(0)"
					if read_mode == 1:
						mode_info = "read(1) * %i" % ni
					# END mode info
					fmt = "Threadcount=%%i: Produced %%i items using %s in %%i transformations in %%f s (%%f items / s)" % mode_info
					reader = rcs[-1]
					st = time.time()
					if read_mode == 1:
						for i in xrange(ni):
							assert len(reader.read(1)) == 1
						# END for each item to read
					else:
						assert len(reader.read(0)) == ni
					# END handle read mode
					elapsed = time.time() - st
					print >> sys.stderr, fmt % (num_threads, ni, num_transformers, elapsed, ni / elapsed)
Exemplo n.º 2
0
class TestThreadPool(TestBase):

    max_threads = cpu_count()

    def _assert_single_task(self, p, async=False):
        """Performs testing in a synchronized environment"""
        sys.stderr.write(
            "Threadpool: Starting single task (async = %i) with %i threads\n" %
            (async, p.size()))
        null_tasks = p.num_tasks()  # in case we had some before

        # add a simple task
        # it iterates n items
        ni = 1000
        assert ni % 2 == 0, "ni needs to be dividable by 2"
        assert ni % 4 == 0, "ni needs to be dividable by 4"

        make_task = lambda *args, **kwargs: make_iterator_task(
            ni, *args, **kwargs)

        task = make_task()

        assert p.num_tasks() == null_tasks
        rc = p.add_task(task)
        assert p.num_tasks() == 1 + null_tasks
        assert isinstance(rc, PoolReader)
        assert task._out_writer is not None

        # pull the result completely - we should get one task, which calls its
        # function once. In sync mode, the order matches
        print("read(0)")
        items = rc.read()
        assert len(items) == ni
        task._assert(1, ni)
        if not async:
            assert items[0] == 0 and items[-1] == ni - 1

        # as the task is done, it should have been removed - we have read everything
        assert task.is_done()
        del (rc)
        assert p.num_tasks() == null_tasks
        task = make_task()

        # pull individual items
        rc = p.add_task(task)
        assert p.num_tasks() == 1 + null_tasks
        st = time.time()
        print("read(1) * %i" % ni)
        for i in range(ni):
            items = rc.read(1)
            assert len(items) == 1

            # can't assert order in async mode
            if not async:
                assert i == items[0]
        # END for each item
        elapsed = time.time() - st
        sys.stderr.write(
            "Threadpool: processed %i individual items, with %i threads, one at a time, in %f s ( %f items / s )\n"
            % (ni, p.size(), elapsed, ni / elapsed))

        # it couldn't yet notice that the input is depleted as we pulled exaclty
        # ni items - the next one would remove it. Instead, we delete our channel
        # which triggers orphan handling
        assert not task.is_done()
        assert p.num_tasks() == 1 + null_tasks
        del (rc)
        assert p.num_tasks() == null_tasks

        # test min count
        # if we query 1 item, it will prepare ni / 2
        task = make_task()
        task.min_count = ni / 2
        rc = p.add_task(task)
        print("read(1)")
        items = rc.read(1)
        assert len(items) == 1 and items[0] == 0  # processes ni / 2
        print("read(1)")
        items = rc.read(1)
        assert len(items) == 1 and items[0] == 1  # processes nothing
        # rest - it has ni/2 - 2 on the queue, and pulls ni-2
        # It wants too much, so the task realizes its done. The task
        # doesn't care about the items in its output channel
        nri = ni - 2
        print("read(%i)" % nri)
        items = rc.read(nri)
        assert len(items) == nri
        p.remove_task(task)
        assert p.num_tasks() == null_tasks
        task._assert(2, ni)  # two chunks, ni calls

        # its already done, gives us no more, its still okay to use it though
        # as a task doesn't have to be in the graph to allow reading its produced
        # items
        print("read(0) on closed")
        # it can happen that a thread closes the channel just a tiny fraction of time
        # after we check this, so the test fails, although it is nearly closed.
        # When we start reading, we should wake up once it sends its signal
        # assert task.is_closed()
        assert len(rc.read()) == 0

        # test chunking
        # we always want 4 chunks, these could go to individual nodes
        task = make_task()
        task.min_count = ni // 2  # restore previous value
        task.max_chunksize = ni // 4  # 4 chunks
        rc = p.add_task(task)

        # must read a specific item count
        # count is still at ni / 2 - here we want more than that
        # 2 steps with n / 4 items, + 1 step with n/4 items to get + 2
        nri = ni // 2 + 2
        print("read(%i) chunksize set" % nri)
        items = rc.read(nri)
        assert len(items) == nri
        # have n / 4 - 2 items on queue, want n / 4 in first chunk, cause 1 processing
        # ( 4 in total ). Still want n / 4 - 2 in second chunk, causing another processing
        nri = ni // 2 - 2
        print("read(%i) chunksize set" % nri)
        items = rc.read(nri)
        assert len(items) == nri

        task._assert(5, ni)

        # delete the handle first, causing the task to be removed and to be set
        # done. We check for the set-done state later. Depending on the timing,
        # The task is not yet set done when we are checking it because we were
        # scheduled in before the flag could be set.
        del (rc)
        assert task.is_done()
        assert p.num_tasks() == null_tasks  # depleted

        # but this only hits if we want too many items, if we want less, it could
        # still do too much - hence we set the min_count to the same number to enforce
        # at least ni / 4 items to be preocessed, no matter what we request
        task = make_task()
        task.min_count = None
        task.max_chunksize = ni / 4  # match previous setup
        rc = p.add_task(task)
        st = time.time()
        print("read(1) * %i, chunksize set" % ni)
        for i in range(ni):
            if async:
                assert len(rc.read(1)) == 1
            else:
                assert rc.read(1)[0] == i
            # END handle async mode
        # END pull individual items
        # too many processing counts ;)
        elapsed = time.time() - st
        sys.stderr.write(
            "Threadpool: processed %i individual items in chunks of %i, with %i threads, one at a time, in %f s ( %f items / s)\n"
            % (ni, ni / 4, p.size(), elapsed, ni / elapsed))

        task._assert(ni, ni)
        assert p.num_tasks() == 1 + null_tasks
        assert p.remove_task(task) is p  # del manually this time
        assert p.num_tasks() == null_tasks

        # now with we set the minimum count to reduce the number of processing counts
        task = make_task()
        task.min_count = ni / 4
        task.max_chunksize = ni / 4  # match previous setup
        rc = p.add_task(task)
        print("read(1) * %i, min_count%i + chunksize" % (ni, task.min_count))
        for i in range(ni):
            items = rc.read(1)
            assert len(items) == 1
            if not async:
                assert items[0] == i
        # END for each item
        task._assert(ni / task.min_count, ni)
        del (rc)
        assert p.num_tasks() == null_tasks

        # test failure
        # on failure, the processing stops and the task is finished, keeping
        # his error for later
        task = make_task()
        task.should_fail = True
        rc = p.add_task(task)
        print("read(0) with failure")
        assert len(rc.read()) == 0  # failure on first item

        assert isinstance(task.error(), AssertionError)
        assert task.is_done()  # on error, its marked done as well
        del (rc)
        assert p.num_tasks() == null_tasks

        # test failure after ni / 2 items
        # This makes sure it correctly closes the channel on failure to prevent blocking
        nri = ni / 2
        task = make_task(TestFailureThreadTask, fail_after=ni / 2)
        rc = p.add_task(task)
        assert len(rc.read()) == nri
        assert task.is_done()
        assert isinstance(task.error(), AssertionError)

        sys.stderr.write("done with everything\n")