Пример #1
0
def launch(workdir, setup, config, Methods, active_workdirs, slices, concurrency, debug, server_url, subjob_cookie, parent_pid):
	starttime = monotonic()
	jobid = setup.jobid
	method = setup.method
	if subjob_cookie:
		print_prefix = ''
	else:
		print_prefix = '    '
	print('%s| %s [%s] |' % (print_prefix, jobid, method,))
	args = dict(
		workdir=workdir,
		slices=slices,
		concurrency=concurrency,
		jobid=jobid,
		result_directory=config.get('result_directory', ''),
		common_directory=config.get('common_directory', ''),
		input_directory=config.get('input_directory', ''),
		workdirs=active_workdirs,
		server_url=server_url,
		subjob_cookie=subjob_cookie,
		parent_pid=parent_pid,
		debuggable=config.debuggable,
	)
	from accelerator.runner import runners
	runner = runners[Methods.db[method].version]
	child, prof_r = runner.launch_start(args)
	# There's a race where if we get interrupted right after fork this is not recorded
	# (the launched job could continue running)
	try:
		children.add(child)
		status, data = runner.launch_finish(child, prof_r, workdir, jobid, method)
		if status:
			os.killpg(child, SIGTERM) # give it a chance to exit gracefully
			# The dying process won't have sent an end message, so it has
			# the endwait time until we SIGKILL it.
			print('%s| %s [%s]  failed!    (%5.1fs) |' % (print_prefix, jobid, method, monotonic() - starttime))
		# There is a race where stuff on the status socket has not arrived when
		# the sending process exits. This is basically benign, but let's give
		# it a chance to arrive to cut down on confusing warnings.
		statmsg_endwait(child, 0.1)
	finally:
		try:
			os.killpg(child, SIGKILL) # this should normally be a no-op, but in case it left anything.
		except Exception:
			pass
		try:
			children.remove(child)
		except Exception:
			pass
		try:
			# won't block long (we just killed it, plus it had probably already exited)
			runner.launch_waitpid(child)
		except Exception:
			pass
	if status:
		raise JobError(jobid, method, status)
	print('%s| %s [%s]  completed. (%5.1fs) |' % (print_prefix, jobid, method, monotonic() - starttime))
	return data
Пример #2
0
def build(method, options={}, datasets={}, jobids={}, name=None, caption=None):
    """Just like urd.build, but for making subjobs"""

    global _a
    assert g.running != 'analysis', "Analysis is not allowed to make subjobs"
    assert g.subjob_cookie, "Can't build subjobs: out of cookies"
    if not _a:
        _a = Automata(g.daemon_url, subjob_cookie=g.subjob_cookie)
        _a.update_method_deps()
        _a.record[None] = _a.jobs = jobs

    def run():
        return _a.call_method(method,
                              options=options,
                              datasets=datasets,
                              jobids=jobids,
                              record_as=name,
                              caption=caption)

    try:
        if name or caption:
            msg = 'Building subjob %s' % (name or method, )
            if caption:
                msg += ' "%s"' % (caption, )
            with status(msg):
                jid = run()
        else:
            jid = run()
    except DaemonError as e:
        raise DaemonError(e.args[0])
    except JobError as e:
        raise JobError(e.jobid, e.method, e.status)
    for d in _a.job_retur.jobs.values():
        if d.link not in _record:
            _record[d.link] = bool(d.make)
    return jid
Пример #3
0
 def run(jobidv, tlock):
     for jobid in jobidv:
         passed_cookie = None
         # This is not a race - all higher locks are locked too.
         while passed_cookie in job_tracking:
             passed_cookie = gen_cookie()
         concurrency_map = dict(
             data.concurrency_map)
         concurrency_map.update(
             setup.get('concurrency_map', ()))
         job_tracking[passed_cookie] = DotDict(
             lock=JLock(),
             last_error=None,
             last_time=0,
             workdir=workdir,
             concurrency_map=concurrency_map,
         )
         try:
             explicit_concurrency = setup.get(
                 'concurrency'
             ) or concurrency_map.get(setup.method)
             concurrency = explicit_concurrency or concurrency_map.get(
                 '-default-')
             if concurrency and setup.method == 'csvimport':
                 # just to be safe, check the package too
                 if load_setup(
                         jobid
                 ).package == 'accelerator.standard_methods':
                     # ignore default concurrency, error on explicit.
                     if explicit_concurrency:
                         raise JobError(
                             jobid, 'csvimport', {
                                 'server':
                                 'csvimport can not run with reduced concurrency'
                             })
                     concurrency = None
             self.ctrl.run_job(
                 jobid,
                 subjob_cookie=passed_cookie,
                 parent_pid=setup.get(
                     'parent_pid', 0),
                 concurrency=concurrency)
             # update database since a new jobid was just created
             job = self.ctrl.add_single_jobid(jobid)
             with tlock:
                 link2job[jobid]['make'] = 'DONE'
                 link2job[jobid][
                     'total_time'] = job.total
         except JobError as e:
             error.append(
                 [e.job, e.method, e.status])
             with tlock:
                 link2job[jobid]['make'] = 'FAIL'
             return
         finally:
             del job_tracking[passed_cookie]
     # everything was built ok, update symlink
     try:
         dn = self.ctrl.workspaces[workdir].path
         ln = os.path.join(dn, workdir + "-LATEST_")
         try:
             os.unlink(ln)
         except OSError:
             pass
         os.symlink(jobid, ln)
         os.rename(
             ln,
             os.path.join(dn, workdir + "-LATEST"))
     except OSError:
         traceback.print_exc(file=sys.stderr)