示例#1
0
def fork_analysis(slices, analysis_func, kw, preserve_result, output_fds):
	from multiprocessing import Process, Queue
	q = Queue()
	children = []
	t = time()
	pid = os.getpid()
	for i in range(slices):
		p = Process(target=call_analysis, args=(analysis_func, i, q, preserve_result, pid, output_fds), kwargs=kw, name='analysis-%d' % (i,))
		p.start()
		children.append(p)
	for fd in output_fds:
		os.close(fd)
	per_slice = []
	temp_files = {}
	no_children_no_messages = False
	while len(per_slice) < slices:
		still_alive = []
		for p in children:
			if p.is_alive():
				still_alive.append(p)
			else:
				p.join()
				if p.exitcode:
					raise Exception("%s terminated with exitcode %d" % (p.name, p.exitcode,))
		children = still_alive
		# If a process dies badly we may never get a message here.
		# No need to handle that very quickly though, 10 seconds is fine.
		# (Typically this is caused by running out of memory.)
		try:
			s_no, s_t, s_temp_files, s_dw_lens, s_dw_minmax, s_tb = q.get(timeout=10)
		except QueueEmpty:
			if not children:
				# No children left, so they must have all sent their messages.
				# Still, just to be sure there isn't a race, wait one iteration more.
				if no_children_no_messages:
					raise Exception("All analysis processes exited cleanly, but not all returned a result.")
				else:
					no_children_no_messages = True
			continue
		if s_tb:
			data = [{'analysis(%d)' % (s_no,): s_tb}, None]
			os.write(_prof_fd, json.dumps(data).encode('utf-8'))
			exitfunction()
		per_slice.append((s_no, s_t))
		temp_files.update(s_temp_files)
		for name, lens in s_dw_lens.items():
			dataset._datasetwriters[name]._lens.update(lens)
		for name, minmax in s_dw_minmax.items():
			dataset._datasetwriters[name]._minmax.update(minmax)
	g.update_top_status("Waiting for all slices to finish cleanup")
	for p in children:
		p.join()
	if preserve_result:
		res_seq = ResultIterMagic(slices, reuse_msg="analysis_res is an iterator, don't re-use it")
	else:
		res_seq = None
	return [v - t for k, v in sorted(per_slice)], temp_files, res_seq
示例#2
0
def fork_analysis(slices, concurrency, analysis_func, kw, preserve_result,
                  output_fds, q):
    from multiprocessing import Process
    import gc
    children = []
    t = monotonic()
    pid = os.getpid()
    if hasattr(gc, 'freeze'):
        # See https://bugs.python.org/issue31558
        # (Though we keep the gc disabled by default.)
        gc.freeze()
    delayed_start = False
    delayed_start_todo = 0
    for i in range(slices):
        if i == concurrency:
            assert concurrency != 0
            # The rest will wait on this queue
            delayed_start = os.pipe()
            delayed_start_todo = slices - i
        p = SimplifiedProcess(target=call_analysis,
                              args=(analysis_func, i, delayed_start, q,
                                    preserve_result, pid, output_fds),
                              kwargs=kw,
                              name='analysis-%d' % (i, ))
        children.append(p)
    for fd in output_fds:
        os.close(fd)
    if delayed_start:
        os.close(delayed_start[0])
    q.make_reader()
    per_slice = []
    temp_files = {}
    no_children_no_messages = False
    reap_time = monotonic() + 5
    exit_count = 0
    while len(per_slice) < slices:
        if exit_count > 0 or reap_time <= monotonic():
            still_alive = []
            for p in children:
                if p.is_alive():
                    still_alive.append(p)
                else:
                    exit_count -= 1
                    if p.exitcode:
                        raise AcceleratorError(
                            "%s terminated with exitcode %d" % (
                                p.name,
                                p.exitcode,
                            ))
            children = still_alive
            reap_time = monotonic() + 5
        # If a process dies badly we may never get a message here.
        # (iowrapper tries to tell us though.)
        # No need to handle that very quickly though, 10 seconds is fine.
        # (Typically this is caused by running out of memory.)
        try:
            msg = q.get(timeout=10)
            if not msg:
                # Notification from iowrapper, so we wake up (quickly) even if
                # the process died badly (e.g. from running out of memory).
                exit_count += 1
                continue
            s_no, s_t, s_temp_files, s_dw_lens, s_dw_minmax, s_dw_compressions, s_tb = msg
        except QueueEmpty:
            if not children:
                # No children left, so they must have all sent their messages.
                # Still, just to be sure there isn't a race, wait one iteration more.
                if no_children_no_messages:
                    raise AcceleratorError(
                        "All analysis processes exited cleanly, but not all returned a result."
                    )
                else:
                    no_children_no_messages = True
            continue
        if s_tb:
            data = [{'analysis(%d)' % (s_no, ): s_tb}, None]
            writeall(_prof_fd, json.dumps(data).encode('utf-8'))
            exitfunction()
        if delayed_start_todo:
            # Another analysis is allowed to run now
            os.write(delayed_start[1], b'a')
            delayed_start_todo -= 1
        per_slice.append((s_no, s_t))
        temp_files.update(s_temp_files)
        for name, lens in s_dw_lens.items():
            dataset._datasetwriters[name]._lens.update(lens)
        for name, minmax in s_dw_minmax.items():
            dataset._datasetwriters[name]._minmax.update(minmax)
        for name, compressions in s_dw_compressions.items():
            dataset._datasetwriters[name]._compressions.update(compressions)
    g.update_top_status("Waiting for all slices to finish cleanup")
    q.close()
    if delayed_start:
        os.close(delayed_start[1])
    for p in children:
        p.join()
    if preserve_result:
        res_seq = ResultIterMagic(
            slices, reuse_msg="analysis_res is an iterator, don't re-use it")
    else:
        res_seq = None
    return [v - t for k, v in sorted(per_slice)], temp_files, res_seq