Exemplo n.º 1
0
 def call_method(self,
                 method,
                 options={},
                 datasets={},
                 jobs={},
                 record_in=None,
                 record_as=None,
                 why_build=False,
                 caption=None,
                 workdir=None,
                 concurrency=None,
                 **kw):
     if method not in self._method_info:
         raise Exception('Unknown method %s' % (method, ))
     info = self._method_info[method]
     params = dict(options=dict(options),
                   datasets=dict(datasets),
                   jobs=dict(jobs))
     argmap = defaultdict(list)
     for thing in ('options', 'datasets', 'jobs'):
         for n in info[thing]:
             argmap[n].append(thing)
     for k, v in kw.items():
         if k not in argmap:
             raise Exception(
                 'Keyword %s not in options/datasets/jobs for method %s' % (
                     k,
                     method,
                 ))
         if len(argmap[k]) != 1:
             raise Exception(
                 'Keyword %s has several targets on method %s: %r' % (
                     k,
                     method,
                     argmap[k],
                 ))
         params[argmap[k][0]][k] = v
     jid, res = self._submit(method,
                             caption=caption,
                             why_build=why_build,
                             workdir=workdir,
                             concurrency=concurrency,
                             **params)
     if why_build:  # specified by caller
         return res.why_build
     if 'why_build' in res:  # done by server anyway (because --flags why_build)
         print("Would have built from:")
         print("======================")
         print(setupfile.encode_setup(self.history[-1][0], as_str=True))
         print("Could have avoided build if:")
         print("============================")
         print(json_encode(res.why_build, as_str=True))
         print()
         from inspect import stack
         stk = stack()[2]
         print("Called from %s line %d" % (
             stk[1],
             stk[2],
         ))
         exit()
     jid = Job(jid, record_as or method)
     self.record[record_in].append(jid)
     return jid
Exemplo n.º 2
0
    def _handle_req(self, path, args):
        if path[0] == 'status':
            data = job_tracking.get(args.get('subjob_cookie') or None)
            if not data:
                self.do_response(400, 'text/plain', 'bad subjob_cookie!\n')
                return
            timeout = min(float(args.get('timeout', 0)), 128)
            status = DotDict(idle=data.lock.acquire(False))
            deadline = time.time() + timeout
            while not status.idle and time.time() < deadline:
                time.sleep(0.1)
                status.idle = data.lock.acquire(False)
            if status.idle:
                if data.last_error:
                    status.last_error_time = data.last_error[0]
                status.last_time = data.last_time
                data.lock.release()
            elif path == ['status', 'full']:
                status.status_stacks, status.current = status_stacks_export()
            status.report_t = time.time()
            self.do_response(200, "text/json", status)
            return

        elif path == ['last_error']:
            data = job_tracking.get(args.get('subjob_cookie') or None)
            if not data:
                self.do_response(400, 'text/plain', 'bad subjob_cookie!\n')
                return
            status = DotDict()
            if data.last_error:
                status.time = data.last_error[0]
                status.last_error = data.last_error[1]
            self.do_response(200, "text/json", status)
            return

        elif path == ['list_workdirs']:
            ws = {k: v.path for k, v in self.ctrl.list_workdirs().items()}
            self.do_response(200, "text/json", ws)

        elif path[0] == 'workdir':
            self.do_response(200, "text/json",
                             self.ctrl.DataBase.db_by_workdir[path[1]])

        elif path == ['config']:
            self.do_response(200, "text/json", self.ctrl.config)

        elif path == ['update_methods']:
            self.do_response(200, "text/json", self.ctrl.update_methods())

        elif path == ['methods']:
            """ return a json with everything the Method object knows about the methods """
            self.do_response(200, "text/json", self.ctrl.get_methods())

        elif path[0] == 'method_info':
            method = path[1]
            self.do_response(200, "text/json", self.ctrl.method_info(method))

        elif path[0] == 'workspace_info':
            self.do_response(200, 'text/json',
                             self.ctrl.get_workspace_details())

        elif path[0] == 'abort':
            tokill = list(children)
            print('Force abort', tokill)
            for child in tokill:
                os.killpg(child, signal.SIGKILL)
            self.do_response(200, 'text/json', {'killed': len(tokill)})

        elif path[0] == 'method2job':
            method, num = path[1:]
            jobs = self.ctrl.DataBase.db_by_method.get(method, ())
            start_ix = 0
            start_from = args.get('start_from')
            if start_from:
                for start_ix, job in enumerate(jobs):
                    if job.id == start_from:
                        break
                else:
                    start_ix = None
            if start_ix is None:
                res = {
                    'error': '%s is not a current %s job' % (
                        start_from,
                        method,
                    )
                }
            else:
                num = int(num) + start_ix
                if not jobs:
                    res = {
                        'error':
                        'no current jobs with method %s available' % (method, )
                    }
                elif num + start_ix >= len(jobs):
                    res = {
                        'error':
                        'tried to go %d jobs back from %s, but only %d earlier (current) jobs available'
                        % (
                            num,
                            jobs[start_ix].id,
                            len(jobs) - start_ix,
                        )
                    }
                else:
                    res = {'id': jobs[num + start_ix].id}
            self.do_response(200, 'text/json', res)

        elif path[0] == 'job_is_current':
            job = Job(path[1])
            job = self.ctrl.DataBase.db_by_workdir[job.workdir].get(job)
            self.do_response(200, 'text/json', bool(job and job['current']))

        elif path == ['submit']:
            if self.ctrl.broken:
                self.do_response(
                    500, "text/json", {
                        'broken':
                        self.ctrl.broken,
                        'error':
                        'Broken methods: ' + ', '.join(
                            sorted(
                                m.split('.')[-1][2:]
                                for m in self.ctrl.broken))
                    })
            elif 'json' in args:
                if DEBUG_WRITE_JSON:
                    with open('DEBUG_WRITE.json', 'wb') as fh:
                        fh.write(args['json'])
                setup = json_decode(args['json'])
                data = job_tracking.get(setup.get('subjob_cookie') or None)
                if not data:
                    self.do_response(403, 'text/plain', 'bad subjob_cookie!\n')
                    return
                if len(job_tracking) - 1 > 5:  # max five levels
                    print('Too deep subjob nesting!')
                    self.do_response(403, 'text/plain',
                                     'Too deep subjob nesting')
                    return
                if data.lock.acquire(False):
                    still_locked = True
                    respond_after = True
                    try:
                        if self.DEBUG:
                            print('@server.py:  Got the lock!',
                                  file=sys.stderr)
                        workdir = setup.get('workdir', data.workdir)
                        jobidv, job_res = self.ctrl.initialise_jobs(
                            setup, workdir)
                        job_res['done'] = False
                        if jobidv:
                            error = []
                            tlock = TLock()
                            link2job = {
                                j['link']: j
                                for j in job_res['jobs'].values()
                            }

                            def run(jobidv, tlock):
                                for jobid in jobidv:
                                    passed_cookie = None
                                    # This is not a race - all higher locks are locked too.
                                    while passed_cookie in job_tracking:
                                        passed_cookie = gen_cookie()
                                    job_tracking[passed_cookie] = DotDict(
                                        lock=JLock(),
                                        last_error=None,
                                        last_time=0,
                                        workdir=workdir,
                                    )
                                    try:
                                        self.ctrl.run_job(
                                            jobid,
                                            subjob_cookie=passed_cookie,
                                            parent_pid=setup.get(
                                                'parent_pid', 0))
                                        # update database since a new jobid was just created
                                        job = self.ctrl.add_single_jobid(jobid)
                                        with tlock:
                                            link2job[jobid]['make'] = 'DONE'
                                            link2job[jobid][
                                                'total_time'] = job.total
                                    except JobError as e:
                                        error.append(
                                            [e.jobid, e.method, e.status])
                                        with tlock:
                                            link2job[jobid]['make'] = 'FAIL'
                                        return
                                    finally:
                                        del job_tracking[passed_cookie]
                                # everything was built ok, update symlink
                                try:
                                    dn = self.ctrl.workspaces[workdir].path
                                    ln = os.path.join(dn, workdir + "-LATEST_")
                                    try:
                                        os.unlink(ln)
                                    except OSError:
                                        pass
                                    os.symlink(jobid, ln)
                                    os.rename(
                                        ln,
                                        os.path.join(dn, workdir + "-LATEST"))
                                except OSError:
                                    traceback.print_exc()

                            t = Thread(target=run,
                                       name="job runner",
                                       args=(
                                           jobidv,
                                           tlock,
                                       ))
                            t.daemon = True
                            t.start()
                            t.join(2)  # give job two seconds to complete
                            with tlock:
                                for j in link2job.values():
                                    if j['make'] in (
                                            True,
                                            'FAIL',
                                    ):
                                        respond_after = False
                                        job_res_json = json_encode(job_res)
                                        break
                            if not respond_after:  # not all jobs are done yet, give partial response
                                self.do_response(200, "text/json",
                                                 job_res_json)
                            t.join()  # wait until actually complete
                            del tlock
                            del t
                            # verify that all jobs got built.
                            total_time = 0
                            for j in link2job.values():
                                jobid = j['link']
                                if j['make'] == True:
                                    # Well, crap.
                                    error.append([
                                        jobid, "unknown", {
                                            "INTERNAL": "Not built"
                                        }
                                    ])
                                    print("INTERNAL ERROR IN JOB BUILDING!",
                                          file=sys.stderr)
                                total_time += j.get('total_time', 0)
                            if error:
                                data.last_error = (time.time(), error)
                            data.last_time = total_time
                    except Exception as e:
                        if respond_after:
                            data.lock.release()
                            still_locked = False
                            self.do_response(500, "text/json",
                                             {'error': str(e)})
                        raise
                    finally:
                        if still_locked:
                            data.lock.release()
                    if respond_after:
                        job_res['done'] = True
                        self.do_response(200, "text/json", job_res)
                    if self.DEBUG:
                        print("@server.py:  Process releases lock!",
                              file=sys.stderr
                              )  # note: has already done http response
                else:
                    self.do_response(503, 'text/plain',
                                     'Busy doing work for you...\n')
            else:
                self.do_response(400, 'text/plain', 'Missing json input!\n')
        else:
            self.do_response(404, 'text/plain', 'Unknown path\n')
            return
Exemplo n.º 3
0
def get_job(jobid):
	if jobid.endswith('-LATEST'):
		base = jobid.rsplit('-', 1)[0]
		jobid = os.readlink(Job(base + '-0').path[:-2] + '-LATEST')
	return Job(jobid)
Exemplo n.º 4
0
def _name2job(cfg, n):
    if n.startswith(':'):
        # resolve through urd
        assert cfg.urd, 'No urd configured'
        a = n[1:].rsplit(':', 1)
        if len(a) == 1:
            raise JobNotFound('looks like a partial :urdlist:[entry] spec')
        entry = a[1] or '-1'
        try:
            entry = int(entry, 10)
        except ValueError:
            pass
        path, tildes = split_tildes(a[0])
        path = path.split('/')
        if len(path) < 3:
            path.insert(0, environ.get('USER', 'NO-USER'))
        if len(path) < 3:
            path.append('latest')
        path = '/'.join(map(url_quote, path))
        try:
            urdres = urd_call_w_tildes(cfg, path, tildes)
        except UrdError as e:
            print(e, file=sys.stderr)
            urdres = None
        if not urdres:
            raise JobNotFound('urd list %r not found' % (a[0], ))
        from accelerator.build import JobList
        joblist = JobList(Job(e[1], e[0]) for e in urdres.joblist)
        res = joblist.get(entry)
        if not res:
            raise JobNotFound('%r not found in %s' % (
                entry,
                path,
            ))
        return res
    if re.match(r'[^/]+-\d+$', n):
        # Looks like a jobid
        return Job(n)
    m = re.match(r'([^/]+)-LATEST$', n)
    if m:
        # Looks like workdir-LATEST
        wd = m.group(1)
        if wd not in WORKDIRS:
            raise NoSuchWorkdirError('Not a valid workdir: "%s"' % (wd, ))
        path = join(WORKDIRS[wd], n)
        try:
            n = readlink(path)
        except OSError as e:
            raise JobNotFound('Failed to read %s: %s' % (
                path,
                e,
            ))
        return Job(n)
    if '/' not in n:
        # Must be a method then
        return method2job(cfg, n)
    if exists(join(n, 'setup.json')):
        # Looks like the path to a jobdir
        path, jid = split(realpath(n))
        job = Job(jid)
        if WORKDIRS.get(job.workdir, path) != path:
            print("### Overriding workdir %s to %s" % (
                job.workdir,
                path,
            ))
        WORKDIRS[job.workdir] = path
        return job
    raise JobNotFound("Don't know what to do with %r." % (n, ))