Пример #1
0
from compat import unicode

from extras import json_encode, json_decode, DotDict
from dispatch import JobError
from status import statmsg_sink, children, print_status_stacks, status_stacks_export



DEBUG_WRITE_JSON = False


def gen_cookie(size=16):
	return ''.join(random.choice(ascii_letters) for _ in range(size))

# This contains cookie: {lock, last_error, last_time} for all jobs, main jobs have cookie None.
job_tracking = {None: DotDict(lock=JLock(), last_error=None, last_time=0)}


# This needs .ctrl to work. It is set from main()
class XtdHandler(BaseWebHandler):
	server_version = "scx/0.1"
	DEBUG =  not True

	def log_message(self, format, *args):
		return

	def encode_body(self, body):
		if isinstance(body, bytes):
			return body
		if isinstance(body, unicode):
			return body.encode('utf-8')
Пример #2
0
	def _handle_req(self, path, args):
		if path[0] == 'status':
			data = job_tracking.get(args.get('subjob_cookie') or None)
			if not data:
				self.do_response(500, 'text/plain', 'bad subjob_cookie!\n' )
				return
			timeout = min(float(args.get('timeout', 0)), 128)
			status = DotDict(idle=data.lock.acquire(False))
			deadline = time.time() + timeout
			while not status.idle and time.time() < deadline:
				time.sleep(0.1)
				status.idle = data.lock.acquire(False)
			if status.idle:
				if data.last_error:
					status.last_error = data.last_error
					data.last_error = None
				else:
					status.last_time = data.last_time
				data.lock.release()
			elif path == ['status', 'full']:
				status.status_stacks, status.current = status_stacks_export()
			self.do_response(200, "text/json", status)
			return

		elif path==['list_workspaces']:
			ws = {k: v.path for k, v in self.ctrl.list_workspaces().items()}
			self.do_response(200, "text/json", ws)

		elif path==['config']:
			self.do_response(200, "text/json", self.ctrl.config)

		elif path==['update_methods']:
			self.do_response(200, "text/json", self.ctrl.update_methods())

		elif path==['methods']:
			""" return a json with everything the Method object knows about the methods """
			self.do_response(200, "text/json", self.ctrl.get_methods())

		elif path[0]=='method_info':
			method = path[1]
			self.do_response(200, "text/json", self.ctrl.method_info(method))

		elif path[0]=='set_workspace':
			_ws = path[1]
			if _ws not in self.ctrl.list_workspaces():
				self.do_response(500,'text/plain', 'Undefined workspace \"%s\"\n' % _ws)
			else:
				self.ctrl.set_workspace(_ws)
				self.do_response(200,'text/plain', 'Workspace set to \"%s\"\n' % _ws)

		elif path[0]=='workspace_info':
			self.do_response(200, 'text/json', self.ctrl.get_workspace_details())

		elif path[0] == 'abort':
			tokill = list(children)
			print('Force abort', tokill)
			for child in tokill:
				os.killpg(child, signal.SIGKILL)
			self.do_response(200, 'text/json', {'killed': len(tokill)})

		elif path==['submit']:
			if self.ctrl.broken:
				self.do_response(500, "text/json", {'broken': self.ctrl.broken, 'error': 'Broken methods: ' + ', '.join(sorted(m.split('.')[-1][2:] for m in self.ctrl.broken))})
			elif 'xml' in args:
				self.do_response(500, 'text/plain', 'JSON > XML!\n' )
			elif 'json' in args:
				if DEBUG_WRITE_JSON:
					with open('DEBUG_WRITE.json', 'wb') as fh:
						fh.write(args['json'])
				setup = json_decode(args['json'])
				data = job_tracking.get(setup.get('subjob_cookie') or None)
				if not data:
					self.do_response(500, 'text/plain', 'bad subjob_cookie!\n' )
					return
				if len(job_tracking) - 1 > 5: # max five levels
					print('Too deep subjob nesting!')
					self.do_response(500, 'text/plain', 'Too deep subjob nesting')
					return
				if data.lock.acquire(False):
					respond_after = True
					try:
						if self.DEBUG:  print('@daemon.py:  Got the lock!', file=sys.stderr)
						jobidv, job_res = self.ctrl.initialise_jobs(setup)
						job_res['done'] = False
						if jobidv:
							error = []
							tlock = TLock()
							link2job = {j['link']: j for j in job_res['jobs'].values()}
							def run(jobidv, tlock):
								for jobid in jobidv:
									passed_cookie = None
									# This is not a race - all higher locks are locked too.
									while passed_cookie in job_tracking:
										passed_cookie = gen_cookie()
									job_tracking[passed_cookie] = DotDict(lock=JLock(), last_error=None, last_time=0)
									try:
										self.ctrl.run_job(jobid, subjob_cookie=passed_cookie, parent_pid=setup.get('parent_pid', 0))
										# update database since a new jobid was just created
										job = self.ctrl.add_single_jobid(jobid)
										with tlock:
											link2job[jobid]['make'] = 'DONE'
											link2job[jobid]['total_time'] = job.total
									except JobError as e:
										error.append([e.jobid, e.method, e.status])
										with tlock:
											link2job[jobid]['make'] = 'FAIL'
										return
									finally:
										del job_tracking[passed_cookie]
								# everything was built ok, update symlink
								try:
									wn = self.ctrl.current_workspace
									dn = self.ctrl.workspaces[wn].path
									ln = os.path.join(dn, wn + "-LATEST_")
									try:
										os.unlink(ln)
									except OSError:
										pass
									os.symlink(jobid, ln)
									os.rename(ln, os.path.join(dn, wn + "-LATEST"))
								except Exception:
									pass # meh
							t = Thread(target=run, name="job runner", args=(jobidv, tlock,))
							t.daemon = True
							t.start()
							t.join(2) # give job two seconds to complete
							with tlock:
								for j in link2job.values():
									if j['make'] in (True, 'FAIL',):
										respond_after = False
										job_res_json = json_encode(job_res)
										break
							if not respond_after: # not all jobs are done yet, give partial response
								self.do_response(200, "text/json", job_res_json)
							t.join() # wait until actually complete
							del tlock
							del t
							# verify that all jobs got built.
							total_time = 0
							for j in link2job.values():
								jobid = j['link']
								if j['make'] == True:
									# Well, crap.
									error.append([jobid, "unknown", {"INTERNAL": "Not built"}])
									print("INTERNAL ERROR IN JOB BUILDING!", file=sys.stderr)
								total_time += j.get('total_time', 0)
							data.last_error = error
							data.last_time = total_time
					except Exception as e:
						if respond_after:
							self.do_response(500, "text/json", {'error': str(e)})
						raise
					finally:
						data.lock.release()
					if respond_after:
						job_res['done'] = True
						self.do_response(200, "text/json", job_res)
					if self.DEBUG:  print("@daemon.py:  Process releases lock!", file=sys.stderr) # note: has already done http response
				else:
					self.do_response(200, 'text/plain', 'Busy doing work for you...\n')
			else:
				self.do_response(500, 'text/plain', 'Missing json input!\n' )
		else:
			self.do_response(500, 'text/plain', 'Unknown path\n' )
			return
Пример #3
0
 def as_dep(self):
     return DotDict(timestamp=self.timestamp,
                    joblist=self.joblist,
                    caption=self.caption,
                    _default=lambda: None)
Пример #4
0
def synthesis(params, analysis_res, prepare_res):
    r = report()
    res = DotDict()
    d = datasets.source
    analysis_res = list(analysis_res)
    if options.filter_bad:
        num_lines_per_split = [
            num - data[1] for num, data in zip(d.lines, analysis_res)
        ]
        res.bad_line_count_per_slice = [data[1] for data in analysis_res]
        res.bad_line_count_total = sum(res.bad_line_count_per_slice)
        r.println('Slice   Bad line count')
        for sliceno, cnt in enumerate(res.bad_line_count_per_slice):
            r.println('%5d   %d' % (
                sliceno,
                cnt,
            ))
        r.println('total   %d' % (res.bad_line_count_total, ))
        r.line()
        r.println('Slice   Bad line number')
        reported_count = 0
        for sliceno, data in enumerate(analysis_res):
            fn = 'badmap%d' % (sliceno, )
            if data[1] and reported_count < 32:
                with open(fn, 'rb') as fh:
                    badmap = mmap(fh.fileno(), 0, prot=PROT_READ)
                    for ix, v in enumerate(imap(ord, badmap)):
                        if v:
                            for jx in range(8):
                                if v & (1 << jx):
                                    r.println('%5d   %d' % (
                                        sliceno,
                                        ix * 8 + jx,
                                    ))
                                    reported_count += 1
                                    if reported_count >= 32: break
                            if reported_count >= 32: break
                    badmap.close()
            unlink(fn)
        if reported_count >= 32:
            r.println('...')
        r.line()
        res.bad_line_count_per_column = {}
        r.println('Bad line count   Column')
        for colname in sorted(analysis_res[0][0]):
            cnt = sum(data[0][colname] for data in analysis_res)
            r.println('%14d   %s' % (
                cnt,
                colname,
            ))
            res.bad_line_count_per_column[colname] = cnt
        r.line()
    else:
        num_lines_per_split = d.lines
    dw = prepare_res
    for sliceno, count in enumerate(num_lines_per_split):
        dw.set_lines(sliceno, count)
    if options.defaults:
        r.println('Defaulted values')
        res.defaulted_per_slice = {}
        res.defaulted_total = {}
        for colname in sorted(options.defaults):
            r.println('    %s:' % (colname, ))
            r.println('        Slice   Defaulted line count')
            res.defaulted_per_slice[colname] = [
                data[2][colname] for data in analysis_res
            ]
            res.defaulted_total[colname] = sum(
                res.defaulted_per_slice[colname])
            for sliceno, cnt in enumerate(res.defaulted_per_slice[colname]):
                r.println('        %5d   %d' % (
                    sliceno,
                    cnt,
                ))
            r.println('        total   %d' % (res.defaulted_total[colname], ))
        r.line()
    for sliceno, data in enumerate(analysis_res):
        dw.set_minmax(sliceno, data[3])
    d = dw.finish()
    res.good_line_count_per_slice = num_lines_per_split
    res.good_line_count_total = sum(num_lines_per_split)
    r.line()
    r.println('Total of %d lines converted' % (res.good_line_count_total, ))
    r.close()
    json_save(res)
Пример #5
0
def real_synthesis(params,
                   options,
                   datasets,
                   minmax_index,
                   prepare_res,
                   we_have_spill,
                   save_discard=False):
    stats = DotDict(
        included_lines=[0] * params.slices,
        discarded_lines=[0] * params.slices,
        spilled_lines=[0] * params.slices,
        virtually_spilled_lines=[0] * params.slices,
        split_date=str(options.split_date) if options.split_date else None,
        discard_before_date=str(options.discard_before_date)
        if options.discard_before_date else None,
    )
    minmax_per_slice = [{} for _ in range(params.slices)]

    def update_stats(data):
        for item in data.itervalues():
            stats.included_lines[sliceno] += item.counters[2]
            stats.discarded_lines[sliceno] += item.counters[1]
            if item.virtual_spill:
                stats.virtually_spilled_lines[sliceno] += item.counters[3]
            else:
                stats.spilled_lines[sliceno] += item.counters[3]
            update_minmax(minmax_per_slice[sliceno], item.minmax)

    def update_minmax(dest, src):
        for name, lst0 in src.iteritems():
            lst1 = dest.get(name, lst0)
            mins = map(min, zip(lst0[:3], lst1[:3]))
            maxs = map(max, zip(lst0[3:], lst1[3:]))
            dest[name] = mins + maxs

    for sliceno in range(params.slices):
        update_stats(blob.load('stats', sliceno=sliceno))
    minmax = {}
    for item in minmax_per_slice:
        update_minmax(minmax, item)

    def minmax_select(offset, stringify=False):
        d = {}
        for k, v in minmax.iteritems():
            mn = v[offset]
            mx = v[3 + offset]
            if mn <= mx:
                if stringify and isinstance(mn, (
                        date,
                        time,
                )):
                    d[k] = [str(mn), str(mx)]
                else:
                    d[k] = [mn, mx]
        return d

    dw, dw_spill = prepare_res[:2]
    dw.set_minmax(None, minmax_select(minmax_index))
    dw_spill.set_minmax(None, minmax_select(2))
    if save_discard:
        included_lines = stats.discarded_lines
    else:
        included_lines = stats.included_lines
    for sliceno in range(params.slices):
        dw.set_lines(sliceno, included_lines[sliceno])
        dw_spill.set_lines(sliceno, stats.spilled_lines[sliceno])
    if not we_have_spill:
        dw_spill.discard()
    stats.minmax_discarded = minmax_select(0, True)
    stats.minmax = minmax_select(1, True)
    stats.minmax_spilled = minmax_select(2, True)
    json_save(stats)
Пример #6
0
def process_one(sliceno,
                options,
                source,
                prepare_res,
                data=None,
                save_discard=False):
    # Future improvement: Look at the old minmax to determine if we will get anything from reading this data
    dw, dw_spill, column_names, column_sizes, column_types, minmax_typeidx = prepare_res
    if data:
        assert data.version == 1
        data.seen_before = True
    else:
        data = empty_spilldata()
    d = Dataset(source, data.spill_ds)
    in_files = []
    out_files = []
    offsets = []
    if not save_discard:
        out_files += [ffi.NULL] * len(
            column_names)  # don't save "too old" lines
    minmax_files = []
    minmax_d = {}
    for colname in column_names:
        out_fn = dw.column_filename(colname, sliceno).encode('ascii')
        in_fn = d.column_filename(colname, sliceno).encode('ascii')
        offset = d.columns[colname].offsets[sliceno] if d.columns[
            colname].offsets else 0
        in_files.append(ffi.new('char []', in_fn))
        out_files.append(ffi.new('char []', out_fn))
        offsets.append(offset)
        minmax_fn = out_fn + '_minmax'
        minmax_files.append(ffi.new('char []', minmax_fn))
        minmax_d[colname] = minmax_fn
    if save_discard:
        out_files += [ffi.NULL] * len(
            column_names)  # don't save "good" lines (save discard instead)
    date_coltype = column_types[options.date_column]

    def date2cfmt(dt):
        if date_coltype == 'datetime':
            date0 = (dt.year << 14) | (dt.month << 10) | (
                dt.day << 5) | dt.hour
            date1 = (dt.minute << 26) | (dt.second << 20) | dt.microsecond
        elif date_coltype == 'date':
            date0 = (dt.year << 9) | (dt.month << 5) | dt.day
            date1 = 0
        elif date_coltype == 'time':
            date0 = 32277536 | dt.hour
            date1 = (dt.minute << 26) | (dt.second << 20) | dt.microsecond
        else:
            raise Exception('Bad date_coltype type: ' + date_coltype)
        return date0, date1

    dates = [0, 0, 0, 0, 0xffffffff, 0xffffffff]
    stats = DotDict()
    if data.seen_before:
        dates[0:2] = date2cfmt(data.get('process_date', datetime.min))
    if (data.last_time or options.hard_spill) and not save_discard:
        for colname in column_names:
            out_fn = dw_spill.column_filename(colname, sliceno).encode('ascii')
            out_files.append(ffi.new('char []', out_fn))
        stats.virtual_spill = False
    else:
        # We still have to make sure the files exist, or we end up
        # with a broken dataset if only some slices wanted to spill.
        for colname in column_names:
            open(dw_spill.column_filename(colname, sliceno), 'ab').close()
        out_files += [ffi.NULL] * len(column_names)
        stats.virtual_spill = True
    # We are done reading `data` - update it for next iteration
    del data.seen_before
    data.process_date = datetime.min
    if options.discard_before_date:
        if options.split_date:
            assert options.discard_before_date < options.split_date
        dates[2:3] = date2cfmt(options.discard_before_date)
        data.process_date = options.discard_before_date
    if options.split_date:
        dates[4:6] = date2cfmt(options.split_date)
        data.process_date = max(data.process_date, options.split_date)
    counters = ffi.new('uint64_t [4]')  # one for each class-enum
    res = backend.filter(len(in_files), in_files, offsets, out_files,
                         minmax_files, column_sizes, counters, dates,
                         minmax_typeidx, d.lines[sliceno])
    assert not res, "cffi converter returned error on data from " + source
    stats.version = 0
    stats.counters = list(counters)
    stats.minmax = {}
    for colname, fn in minmax_d.iteritems():
        if exists(fn):
            with type2iter[column_types[colname]](fn) as it:
                stats.minmax[colname] = list(it)
            unlink(fn)
    # If there is at most 2% left, spill it next time.
    # Or if there is at most 10% left and we have read it at least 8 times.
    # Or if there is at most 20% left and we have read it at least 16 times.
    # A reasonable balance between re-reading and re-writing, one hopes.
    data.counter += 1
    total_lines = sum(counters)
    data.last_time = (counters[3] <= total_lines / 50 or
                      (data.counter >= 8 and counters[3] <= total_lines / 10)
                      or
                      (data.counter >= 16 and counters[3] <= total_lines / 5))
    # If no lines were spilled we will not need this dataset again,
    # nor if we wrote the spill in this dataset.
    if not counters[3] or not stats.virtual_spill:
        data = None
    return data, stats
Пример #7
0
def statmsg_sink(logfilename, sock):
	from extras import DotDict
	print('Logging to "%s".' % (logfilename,))
	with open(logfilename, 'w', encoding='utf-8') as fh:
		ix = 0
		while True:
			data = None
			try:
				data = sock.recv(1500)
				typ, pid, msg = data.decode('utf-8').split('\0', 2)
				pid = int(pid)
				with status_stacks_lock:
					if typ == 'push':
						msg, t, cookie = msg.split('\0', 3)
						t = float(t)
						status_all[pid].stack.append((msg, t, cookie))
					elif typ == 'pop':
						stack, ix = _find(pid, msg)
						if ix == len(stack) - 1:
							stack.pop()
						else:
							print('POP OF WRONG STATUS: %d:%s (index %s of %d)' % (pid, msg, ix, len(stack)))
					elif typ == 'update':
						msg, _, cookie = msg.split('\0', 3)
						stack, ix = _find(pid, cookie)
						if ix is None:
							print('UPDATE TO UNKNOWN STATUS %d:%s: %s' % (pid, cookie, msg))
						else:
							stack[ix] = (msg, stack[ix][1], cookie)
					elif typ == 'start':
						parent_pid, is_analysis, msg, t = msg.split('\0', 3)
						parent_pid = int(parent_pid)
						t = float(t)
						d = DotDict(_default=None)
						d.parent_pid = parent_pid
						d.children   = {}
						d.stack      = [(msg, t, None)]
						d.summary    = (t, msg, t,)
						if parent_pid in status_all:
							if is_analysis:
								msg, parent_t, _ = status_all[parent_pid].stack[0]
								d.summary = (parent_t, msg + ' analysis', t,)
							status_all[parent_pid].children[pid] = d
						else:
							status_tree[pid] = d
						status_all[pid] = d
						del d
					elif typ == 'end':
						d = status_all.get(pid)
						if d:
							if d.parent_pid in status_all:
								p = status_all[d.parent_pid]
								if pid in p.children:
									del p.children[pid]
								del p
							del d
						if pid in  status_tree:
							del status_tree[pid]
					elif typ == 'statmsg':
						fh.write('%s %5d: %s\n' % (strftime("%Y-%m-%d %H:%M:%S"), ix, msg,))
						fh.flush()
						ix += 1
					else:
						print('UNKNOWN MESSAGE: %r' % (data,))
			except Exception:
				print('Failed to process %r:' % (data,))
				print_exc()
Пример #8
0
def load_methods(data):
    res_warnings = []
    res_failed = []
    res_hashes = {}
    res_params = {}
    for package, key in data:
        filename = '%s/a_%s.py' % (
            package,
            key,
        )
        modname = '%s.a_%s' % (package, key)
        try:
            with open(filename, 'rb') as fh:
                src = fh.read()
            tar_fh = io.BytesIO()
            tar_o = tarfile.open(mode='w:gz', fileobj=tar_fh, compresslevel=1)
            tar_o.add(filename, arcname='a_%s.py' % (key, ))
            h = hashlib.sha1(src)
            hash = int(h.hexdigest(), 16)
            mod = import_module(modname)
            prefix = os.path.dirname(mod.__file__) + '/'
            likely_deps = set()
            for k in dir(mod):
                v = getattr(mod, k)
                if isinstance(v, ModuleType):
                    dep = getattr(v, '__file__', '')
                    if dep.startswith(prefix):
                        dep = os.path.basename(dep)
                        if dep[-4:] in (
                                '.pyc',
                                '.pyo',
                        ):
                            dep = dep[:-1]
                        likely_deps.add(dep)
            hash_extra = 0
            for dep in getattr(mod, 'depend_extra', ()):
                if isinstance(dep, ModuleType):
                    dep = dep.__file__
                    if dep[-4:] in (
                            '.pyc',
                            '.pyo',
                    ):
                        dep = dep[:-1]
                if isinstance(dep, str):
                    if not dep.startswith('/'):
                        dep = prefix + dep
                    with open(dep, 'rb') as fh:
                        hash_extra ^= int(
                            hashlib.sha1(fh.read()).hexdigest(), 16)
                    bn = os.path.basename(dep)
                    likely_deps.discard(bn)
                    tar_o.add(dep, arcname=bn)
                else:
                    raise Exception('Bad depend_extra in %s.a_%s: %r' % (
                        package,
                        key,
                        dep,
                    ))
            for dep in likely_deps:
                res_warnings.append(
                    '%s.a_%s should probably depend_extra on %s' % (
                        package,
                        key,
                        dep[:-3],
                    ))
            res_hashes[key] = ("%040x" % (hash ^ hash_extra, ), )
            res_params[key] = params = DotDict()
            for name, default in (
                (
                    'options',
                    {},
                ),
                (
                    'datasets',
                    (),
                ),
                (
                    'jobids',
                    (),
                ),
            ):
                params[name] = getattr(mod, name, default)
            equivalent_hashes = getattr(mod, 'equivalent_hashes', ())
            if equivalent_hashes:
                assert isinstance(
                    equivalent_hashes,
                    dict), 'Read the docs about equivalent_hashes'
                assert len(equivalent_hashes
                           ) == 1, 'Read the docs about equivalent_hashes'
                k, v = next(iteritems(equivalent_hashes))
                assert isinstance(k,
                                  str), 'Read the docs about equivalent_hashes'
                assert isinstance(
                    v, tuple), 'Read the docs about equivalent_hashes'
                for v in v:
                    assert isinstance(
                        v, str), 'Read the docs about equivalent_hashes'
                start = src.index('equivalent_hashes')
                end = src.index('}', start)
                h = hashlib.sha1(src[:start])
                h.update(src[end:])
                verifier = "%040x" % (int(h.hexdigest(), 16) ^ hash_extra, )
                if verifier in equivalent_hashes:
                    res_hashes[key] += equivalent_hashes[verifier]
                else:
                    res_warnings.append(
                        '%s.a_%s has equivalent_hashes, but missing verifier %s'
                        % (
                            package,
                            key,
                            verifier,
                        ))
            tar_o.close()
            tar_fh.seek(0)
            archives[key] = tar_fh.read()
        except Exception:
            print_exc()
            res_failed.append(modname)
            continue
    return res_warnings, res_failed, res_hashes, res_params
Пример #9
0
def execute_process(workdir,
                    jobid,
                    slices,
                    result_directory,
                    common_directory,
                    source_directory,
                    index=None,
                    workspaces=None,
                    daemon_url=None,
                    subjob_cookie=None,
                    parent_pid=0):
    g.JOBID = jobid
    setproctitle('launch')
    path = os.path.join(workdir, jobid)
    try:
        os.chdir(path)
    except Exception:
        print("Cannot cd to workdir", path)
        exit(1)

    g.params = params = job_params()
    method_ref = import_module(params.package + '.a_' + params.method)
    g.sliceno = -1

    if workspaces:
        jobid_module.put_workspaces(workspaces)

    def maybe_dataset(v):
        if isinstance(v, list):
            return [maybe_dataset(e) for e in v]
        if not v:
            return ''
        try:
            return dataset.Dataset(v)
        except IOError:
            return v

    datasets = DotDict(
        {k: maybe_dataset(v)
         for k, v in params.datasets.items()})

    g.options = params.options
    g.datasets = datasets
    g.jobids = params.jobids

    method_ref.options = params.options
    method_ref.datasets = datasets
    method_ref.jobids = params.jobids

    # compatibility names
    g.SLICES = slices
    g.JOBID = jobid
    g.jobid = jobid
    g.METHOD = params.method
    g.WORKSPACEPATH = workdir
    g.CAPTION = params.caption
    g.PACKAGE = params.package
    g.RESULT_DIRECTORY = result_directory
    g.COMMON_DIRECTORY = common_directory
    g.SOURCE_DIRECTORY = source_directory
    g.index = -1

    g.daemon_url = daemon_url
    g.running = 'launch'
    status._start('%s %s' % (
        jobid,
        params.method,
    ), parent_pid)

    def dummy():
        pass

    prepare_func = getattr(method_ref, 'prepare', dummy)
    analysis_func = getattr(method_ref, 'analysis', dummy)
    synthesis_func = getattr(method_ref, 'synthesis', dummy)

    synthesis_needs_analysis = 'analysis_res' in getargspec(
        synthesis_func).args

    # A chain must be finished from the back, so sort on that.
    sortnum_cache = {}

    def dw_sortnum(name):
        if name not in sortnum_cache:
            dw = dataset._datasetwriters[name]
            if dw.previous and dw.previous.startswith(jobid + '/'):
                pname = dw.previous.split('/')[1]
                num = dw_sortnum(pname) + 1
            else:
                num = 0
            sortnum_cache[name] = num
        return sortnum_cache[name]

    prof = {}
    if prepare_func is dummy:
        prof['prepare'] = 0  # truthish!
    else:
        t = time()
        g.running = 'prepare'
        g.subjob_cookie = subjob_cookie
        setproctitle(g.running)
        with status.status(g.running):
            g.prepare_res = method_ref.prepare(**args_for(method_ref.prepare))
            to_finish = [
                dw.name for dw in dataset._datasetwriters.values()
                if dw._started
            ]
            if to_finish:
                with status.status("Finishing datasets"):
                    for name in sorted(to_finish, key=dw_sortnum):
                        dataset._datasetwriters[name].finish()
        prof['prepare'] = time() - t
    setproctitle('launch')
    from extras import saved_files
    if analysis_func is dummy:
        prof['per_slice'] = []
        prof['analysis'] = 0
    else:
        t = time()
        g.running = 'analysis'
        g.subjob_cookie = None  # subjobs are not allowed from analysis
        with status.status('Waiting for all slices to finish analysis'):
            prof['per_slice'], files, g.analysis_res = fork_analysis(
                slices, analysis_func, args_for(analysis_func),
                synthesis_needs_analysis)
        prof['analysis'] = time() - t
        saved_files.update(files)
    t = time()
    g.running = 'synthesis'
    g.subjob_cookie = subjob_cookie
    setproctitle(g.running)
    with status.status(g.running):
        synthesis_res = synthesis_func(**args_for(synthesis_func))
        if synthesis_res is not None:
            blob.save(synthesis_res, temp=False)
        if dataset._datasetwriters:
            with status.status("Finishing datasets"):
                for name in sorted(dataset._datasetwriters, key=dw_sortnum):
                    dataset._datasetwriters[name].finish()
    t = time() - t
    prof['synthesis'] = t

    from subjobs import _record
    status._end()
    return None, (prof, saved_files, _record)