def test(name, input, want_obj, want_bytes, **kw): json_save(input, name, **kw) with open(name, "rb") as fh: got_bytes_raw = fh.read() assert got_bytes_raw[ -1:] == b"\n", name + " didn't even end with a newline" got_bytes_raw = got_bytes_raw[:-1] as_str = json_encode(input, as_str=True, **kw) as_bytes = json_encode(input, as_str=False, **kw) assert isinstance(as_str, str) and isinstance( as_bytes, bytes), "json_encode returns the wrong types: %s %s" % ( type(as_str), type(as_bytes), ) assert as_bytes == got_bytes_raw, "json_save doesn't save the same thing json_encode returns for " + name if PY3: as_str = as_str.encode("utf-8") assert as_bytes == as_str, "json_encode doesn't return the same data for as_str=True and False" got_obj = json_load(name) assert want_obj == got_obj, "%s roundtrips wrong (wanted %r, got %r)" % ( name, want_obj, got_obj) with open(name, "rb") as fh: got_bytes_fuzzy = b"".join(line.strip() for line in fh) assert want_bytes == got_bytes_fuzzy, "%s wrong on disk (but decoded right)" % ( name, )
def call_method(self, method, options={}, datasets={}, jobs={}, record_in=None, record_as=None, why_build=False, caption=None, workdir=None, **kw): if method not in self._method_info: raise Exception('Unknown method %s' % (method,)) info = self._method_info[method] params = dict(options=dict(options), datasets=dict(datasets), jobs=dict(jobs)) argmap = defaultdict(list) for thing in ('options', 'datasets', 'jobs'): for n in info[thing]: argmap[n].append(thing) for k, v in kw.items(): if k not in argmap: raise Exception('Keyword %s not in options/datasets/jobs for method %s' % (k, method,)) if len(argmap[k]) != 1: raise Exception('Keyword %s has several targets on method %s: %r' % (k, method, argmap[k],)) params[argmap[k][0]][k] = v jid, res = self._submit(method, caption=caption, why_build=why_build, workdir=workdir, **params) if why_build: # specified by caller return res.why_build if 'why_build' in res: # done by server anyway (because --flags why_build) print("Would have built from:") print("======================") print(setupfile.encode_setup(self.history[-1][0], as_str=True)) print("Could have avoided build if:") print("============================") print(json_encode(res.why_build, as_str=True)) print() from inspect import stack stk = stack()[2] print("Called from %s line %d" % (stk[1], stk[2],)) exit() jid = Job(jid, record_as or method) self.record[record_in].append(jid) return jid
def _call(self, url, data=None, fmt=_urd_typeify): assert self._url, "No urd configured for this daemon" url = url.replace(' ', '%20') if data is not None: req = Request(url, json_encode(data), self._headers) else: req = Request(url) tries_left = 3 while True: try: r = urlopen(req) try: code = r.getcode() if code == 401: raise UrdPermissionError() if code == 409: raise UrdConflictError() d = r.read() if PY3: d = d.decode('utf-8') return fmt(d) finally: try: r.close() except Exception: pass except HTTPError as e: # It seems inconsistent if we get HTTPError or not for 4xx codes. if e.code == 401: raise UrdPermissionError() if e.code == 409: raise UrdConflictError() tries_left -= 1 if not tries_left: raise UrdError('Error %d from urd' % (e.code, )) print('Error %d from urd, %d tries left' % ( e.code, tries_left, ), file=sys.stderr) except ValueError as e: tries_left -= 1 msg = 'Bad data from urd, %s: %s' % ( type(e).__name__, e, ) if not tries_left: raise UrdError(msg) print('%s, %d tries left' % ( msg, tries_left, ), file=sys.stderr) except URLError: print('Error contacting urd', file=sys.stderr) raise UrdError('Error contacting urd') time.sleep(4)
def _encode_with_compact(data, compact_keys, extra_indent=0, separator='\n', special_keys=()): compact = [] special = [] for k in compact_keys: if k in data: if k == 'exectime': d = _round_floats(data[k], 3) fmted = _encode_with_compact(d, ( 'analysis', 'per_slice', ), 1, '') else: fmted = dumps(data[k]) compact.append(' "%s": %s,' % ( k, fmted, )) del data[k] for k in special_keys: if k in data: fmted = dumps(data[k], indent=4, sort_keys=True) special.append(' "%s": %s' % ( k, fmted.replace('\n', '\n '), )) del data[k] res = json_encode(data, as_str=True) if compact: res = '{\n%s%s%s' % ( '\n'.join(compact), separator, res[1:], ) if special: res = '%s,\n\n%s\n}' % ( res[:-2], ',\n'.join(special), ) res = res.replace('\n', ('\n' + ' ' * extra_indent * 4)) return res
def call_method(self, method, options={}, datasets={}, jobids={}, record_in=None, record_as=None, why_build=False, caption=None, workdir=None): jid, res = self._submit(method, options, datasets, jobids, caption, why_build=why_build, workdir=workdir) if why_build: # specified by caller return res.why_build if 'why_build' in res: # done by server anyway (because --flags why_build) print("Would have built from:") print("======================") print(setupfile.encode_setup(self.history[-1][0], as_str=True)) print("Could have avoided build if:") print("============================") print(json_encode(res.why_build, as_str=True)) print() from inspect import stack stk = stack()[2] print("Called from %s line %d" % ( stk[1], stk[2], )) exit() jid = Job(jid, record_as or method) self.record[record_in].append(jid) return jid
def __init__(self, methods, setup): tree = methods.new_deptree(setup.method) self.methods = methods self.top_method = setup.method self.tree = tree self.add_flags({ 'make': False, 'link': False, }) seen = set() for method, data in iteritems(self.tree): seen.add(method) data['params'] = {method: setup.params[method]} unmatched = { method: params for method, params in iteritems(setup.params) if method not in seen } if unmatched: from accelerator.extras import json_encode print("DepTree Warning: Unmatched options remain:", json_encode(unmatched, as_str=True)) def collect(method): # All methods that method depend on for child in tree[method]['dep']: yield child for method in collect(child): yield method # This probably updates some with the same data several times, # but this is cheap (key: dictref updates, nothing more.) for method, data in iteritems(self.tree): for submethod in set(collect(method)): data['params'].update(tree[submethod]['params']) self._fix_options(False) self._fix_jobids('jobids') self._fix_jobids('datasets')
def call(url, data=None, fmt=json_decode, headers={}, server_name='server', retries=4, quiet=False): if data is not None and not isinstance(data, bytes): data = json_encode(data) err = None req = Request(url, data=data, headers=headers) for attempt in range(1, retries + 2): resp = None try: r = urlopen(req) try: resp = r.read() if server_name == 'server' and g.running in ( 'build', 'shell', ): s_version = r.headers[ 'Accelerator-Version'] or '<unknown (old)>' if s_version != ax_version: # Nothing is supposed to catch this, so just print and die. print( 'Server is running version %s but we are running version %s' % ( s_version, ax_version, ), file=sys.stderr) exit(1) if PY3: resp = resp.decode('utf-8') # It is inconsistent if we get HTTPError or not. # It seems we do when using TCP sockets, but not when using unix sockets. if r.getcode() >= 400: raise HTTPError(url, r.getcode(), resp, {}, None) return fmt(resp) finally: try: r.close() except Exception: pass except HTTPError as e: if resp is None and e.fp: resp = e.fp.read() if PY3: resp = resp.decode('utf-8') msg = '%s says %d: %s' % ( server_name, e.code, resp, ) if server_name == 'urd' and 400 <= e.code < 500: if e.code == 401: err = UrdPermissionError() if e.code == 409: err = UrdConflictError() break if server_name == 'server' and e.code != 503 and resp: return fmt(resp) except URLError: # Don't say anything the first times, because the output # tests get messed up if this happens during them. if attempt < retries - 1: msg = None else: msg = 'error contacting ' + server_name except ValueError as e: msg = 'Bad data from %s, %s: %s' % ( server_name, type(e).__name__, e, ) if msg and not quiet: print(msg, file=sys.stderr) if attempt < retries + 1: time.sleep(attempt / 15) if msg and not quiet: print('Retrying (%d/%d).' % ( attempt, retries, ), file=sys.stderr) else: if not quiet: print('Giving up.', file=sys.stderr) if err: raise err if server_name == 'urd': raise UrdError(msg) else: raise ServerError(msg)
def encode_body(self, body): if isinstance(body, bytes): return body if isinstance(body, unicode): return body.encode('utf-8') return json_encode(body)
def _handle_req(self, path, args): if path[0] == 'status': data = job_tracking.get(args.get('subjob_cookie') or None) if not data: self.do_response(400, 'text/plain', 'bad subjob_cookie!\n') return timeout = min(float(args.get('timeout', 0)), 128) status = DotDict(idle=data.lock.acquire(False)) deadline = monotonic() + timeout while not status.idle and monotonic() < deadline: time.sleep(0.1) status.idle = data.lock.acquire(False) if status.idle: if data.last_error: status.last_error_time = data.last_error[0] status.last_time = data.last_time data.lock.release() elif path == ['status', 'full']: status.status_stacks, status.current = status_stacks_export() status.report_t = monotonic() self.do_response(200, "text/json", status) return elif path == ['last_error']: data = job_tracking.get(args.get('subjob_cookie') or None) if not data: self.do_response(400, 'text/plain', 'bad subjob_cookie!\n') return status = DotDict() if data.last_error: status.time = data.last_error[0] status.last_error = data.last_error[1] self.do_response(200, "text/json", status) return elif path == ['list_workdirs']: ws = {k: v.path for k, v in self.ctrl.list_workdirs().items()} self.do_response(200, "text/json", ws) elif path[0] == 'workdir': self.do_response(200, "text/json", self.ctrl.DataBase.db_by_workdir[path[1]]) elif path == ['config']: self.do_response(200, "text/json", self.ctrl.config) elif path == ['update_methods']: self.do_response(200, "text/json", self.ctrl.update_methods()) elif path == ['methods']: """ return a json with everything the Method object knows about the methods """ self.do_response(200, "text/json", self.ctrl.get_methods()) elif path[0] == 'method_info': method = path[1] self.do_response(200, "text/json", self.ctrl.method_info(method)) elif path[0] == 'workspace_info': self.do_response(200, 'text/json', self.ctrl.get_workspace_details()) elif path[0] == 'abort': tokill = list(children) print('Force abort', tokill) for child in tokill: os.killpg(child, signal.SIGKILL) self.do_response(200, 'text/json', {'killed': len(tokill)}) elif path[0] == 'method2job': method, num = path[1:] jobs = self.ctrl.DataBase.db_by_method.get(method, ()) start_ix = 0 start_from = args.get('start_from') if start_from: for start_ix, job in enumerate(jobs): if job.id == start_from: break else: start_ix = None if start_ix is None: res = { 'error': '%s is not a current %s job' % ( start_from, method, ) } else: num = int(num) if not jobs: res = { 'error': 'no current jobs with method %s available' % (method, ) } elif num + start_ix >= len(jobs): res = { 'error': 'tried to go %d jobs back from %s, but only %d earlier (current) jobs available' % ( num, jobs[start_ix].id, len(jobs) - start_ix - 1, ) } else: res = {'id': jobs[num + start_ix].id} self.do_response(200, 'text/json', res) elif path[0] == 'job_is_current': job = Job(path[1]) job = self.ctrl.DataBase.db_by_workdir[job.workdir].get(job) self.do_response(200, 'text/json', bool(job and job['current'])) elif path == ['submit']: if self.ctrl.broken: self.do_response( 500, "text/json", { 'broken': self.ctrl.broken, 'error': 'Broken methods: ' + ', '.join( sorted( m.split('.')[-1][2:] for m in self.ctrl.broken)) }) elif 'json' in args: if DEBUG_WRITE_JSON: with open('DEBUG_WRITE.json', 'wb') as fh: fh.write(args['json']) setup = json_decode(args['json']) data = job_tracking.get(setup.get('subjob_cookie') or None) if not data: self.do_response(403, 'text/plain', 'bad subjob_cookie!\n') return if len(job_tracking) - 1 > 5: # max five levels print('Too deep subjob nesting!') self.do_response(403, 'text/plain', 'Too deep subjob nesting') return if data.lock.acquire(False): still_locked = True respond_after = True try: if self.DEBUG: print('@server.py: Got the lock!', file=sys.stderr) workdir = setup.get('workdir', data.workdir) jobidv, job_res = self.ctrl.initialise_jobs( setup, workdir) job_res['done'] = False if jobidv: error = [] tlock = TLock() link2job = { j['link']: j for j in job_res['jobs'].values() } def run(jobidv, tlock): for jobid in jobidv: passed_cookie = None # This is not a race - all higher locks are locked too. while passed_cookie in job_tracking: passed_cookie = gen_cookie() concurrency_map = dict( data.concurrency_map) concurrency_map.update( setup.get('concurrency_map', ())) job_tracking[passed_cookie] = DotDict( lock=JLock(), last_error=None, last_time=0, workdir=workdir, concurrency_map=concurrency_map, ) try: explicit_concurrency = setup.get( 'concurrency' ) or concurrency_map.get(setup.method) concurrency = explicit_concurrency or concurrency_map.get( '-default-') if concurrency and setup.method == 'csvimport': # just to be safe, check the package too if load_setup( jobid ).package == 'accelerator.standard_methods': # ignore default concurrency, error on explicit. if explicit_concurrency: raise JobError( jobid, 'csvimport', { 'server': 'csvimport can not run with reduced concurrency' }) concurrency = None self.ctrl.run_job( jobid, subjob_cookie=passed_cookie, parent_pid=setup.get( 'parent_pid', 0), concurrency=concurrency) # update database since a new jobid was just created job = self.ctrl.add_single_jobid(jobid) with tlock: link2job[jobid]['make'] = 'DONE' link2job[jobid][ 'total_time'] = job.total except JobError as e: error.append( [e.job, e.method, e.status]) with tlock: link2job[jobid]['make'] = 'FAIL' return finally: del job_tracking[passed_cookie] # everything was built ok, update symlink try: dn = self.ctrl.workspaces[workdir].path ln = os.path.join(dn, workdir + "-LATEST_") try: os.unlink(ln) except OSError: pass os.symlink(jobid, ln) os.rename( ln, os.path.join(dn, workdir + "-LATEST")) except OSError: traceback.print_exc(file=sys.stderr) t = Thread(target=run, name="job runner", args=( jobidv, tlock, )) t.daemon = True t.start() t.join(2) # give job two seconds to complete with tlock: for j in link2job.values(): if j['make'] in ( True, 'FAIL', ): respond_after = False job_res_json = json_encode(job_res) break if not respond_after: # not all jobs are done yet, give partial response self.do_response(200, "text/json", job_res_json) t.join() # wait until actually complete del tlock del t # verify that all jobs got built. total_time = 0 for j in link2job.values(): jobid = j['link'] if j['make'] == True: # Well, crap. error.append([ jobid, "unknown", { "INTERNAL": "Not built" } ]) print("INTERNAL ERROR IN JOB BUILDING!", file=sys.stderr) total_time += j.get('total_time', 0) if error: data.last_error = (time.time(), error) data.last_time = total_time except Exception as e: if respond_after: data.lock.release() still_locked = False self.do_response(500, "text/json", {'error': str(e)}) raise finally: if still_locked: data.lock.release() if respond_after: job_res['done'] = True self.do_response(200, "text/json", job_res) if self.DEBUG: print("@server.py: Process releases lock!", file=sys.stderr ) # note: has already done http response else: self.do_response(503, 'text/plain', 'Busy doing work for you...\n') else: self.do_response(400, 'text/plain', 'Missing json input!\n') else: self.do_response(404, 'text/plain', 'Unknown path\n') return
def _handle_req(self, path, args): if path[0] == 'status': data = job_tracking.get(args.get('subjob_cookie') or None) if not data: self.do_response(400, 'text/plain', 'bad subjob_cookie!\n') return timeout = min(float(args.get('timeout', 0)), 128) status = DotDict(idle=data.lock.acquire(False)) deadline = time.time() + timeout while not status.idle and time.time() < deadline: time.sleep(0.1) status.idle = data.lock.acquire(False) if status.idle: if data.last_error: status.last_error = data.last_error data.last_error = None else: status.last_time = data.last_time data.lock.release() elif path == ['status', 'full']: status.status_stacks, status.current = status_stacks_export() self.do_response(200, "text/json", status) return elif path == ['list_workdirs']: ws = {k: v.path for k, v in self.ctrl.list_workdirs().items()} self.do_response(200, "text/json", ws) elif path == ['config']: self.do_response(200, "text/json", self.ctrl.config) elif path == ['update_methods']: self.do_response(200, "text/json", self.ctrl.update_methods()) elif path == ['methods']: """ return a json with everything the Method object knows about the methods """ self.do_response(200, "text/json", self.ctrl.get_methods()) elif path[0] == 'method_info': method = path[1] self.do_response(200, "text/json", self.ctrl.method_info(method)) elif path[0] == 'workspace_info': self.do_response(200, 'text/json', self.ctrl.get_workspace_details()) elif path[0] == 'abort': tokill = list(children) print('Force abort', tokill) for child in tokill: os.killpg(child, signal.SIGKILL) self.do_response(200, 'text/json', {'killed': len(tokill)}) elif path == ['submit']: if self.ctrl.broken: self.do_response( 500, "text/json", { 'broken': self.ctrl.broken, 'error': 'Broken methods: ' + ', '.join( sorted( m.split('.')[-1][2:] for m in self.ctrl.broken)) }) elif 'json' in args: if DEBUG_WRITE_JSON: with open('DEBUG_WRITE.json', 'wb') as fh: fh.write(args['json']) setup = json_decode(args['json']) data = job_tracking.get(setup.get('subjob_cookie') or None) if not data: self.do_response(403, 'text/plain', 'bad subjob_cookie!\n') return if len(job_tracking) - 1 > 5: # max five levels print('Too deep subjob nesting!') self.do_response(403, 'text/plain', 'Too deep subjob nesting') return if data.lock.acquire(False): still_locked = True respond_after = True try: if self.DEBUG: print('@daemon.py: Got the lock!', file=sys.stderr) workdir = setup.get('workdir', data.workdir) jobidv, job_res = self.ctrl.initialise_jobs( setup, workdir) job_res['done'] = False if jobidv: error = [] tlock = TLock() link2job = { j['link']: j for j in job_res['jobs'].values() } def run(jobidv, tlock): for jobid in jobidv: passed_cookie = None # This is not a race - all higher locks are locked too. while passed_cookie in job_tracking: passed_cookie = gen_cookie() job_tracking[passed_cookie] = DotDict( lock=JLock(), last_error=None, last_time=0, workdir=workdir, ) try: self.ctrl.run_job( jobid, subjob_cookie=passed_cookie, parent_pid=setup.get( 'parent_pid', 0)) # update database since a new jobid was just created job = self.ctrl.add_single_jobid(jobid) with tlock: link2job[jobid]['make'] = 'DONE' link2job[jobid][ 'total_time'] = job.total except JobError as e: error.append( [e.jobid, e.method, e.status]) with tlock: link2job[jobid]['make'] = 'FAIL' return finally: del job_tracking[passed_cookie] # everything was built ok, update symlink try: dn = self.ctrl.workspaces[workdir].path ln = os.path.join(dn, workdir + "-LATEST_") try: os.unlink(ln) except OSError: pass os.symlink(jobid, ln) os.rename( ln, os.path.join(dn, workdir + "-LATEST")) except OSError: traceback.print_exc() t = Thread(target=run, name="job runner", args=( jobidv, tlock, )) t.daemon = True t.start() t.join(2) # give job two seconds to complete with tlock: for j in link2job.values(): if j['make'] in ( True, 'FAIL', ): respond_after = False job_res_json = json_encode(job_res) break if not respond_after: # not all jobs are done yet, give partial response self.do_response(200, "text/json", job_res_json) t.join() # wait until actually complete del tlock del t # verify that all jobs got built. total_time = 0 for j in link2job.values(): jobid = j['link'] if j['make'] == True: # Well, crap. error.append([ jobid, "unknown", { "INTERNAL": "Not built" } ]) print("INTERNAL ERROR IN JOB BUILDING!", file=sys.stderr) total_time += j.get('total_time', 0) data.last_error = error data.last_time = total_time except Exception as e: if respond_after: data.lock.release() still_locked = False self.do_response(500, "text/json", {'error': str(e)}) raise finally: if still_locked: data.lock.release() if respond_after: job_res['done'] = True self.do_response(200, "text/json", job_res) if self.DEBUG: print("@daemon.py: Process releases lock!", file=sys.stderr ) # note: has already done http response else: self.do_response(503, 'text/plain', 'Busy doing work for you...\n') else: self.do_response(400, 'text/plain', 'Missing json input!\n') else: self.do_response(404, 'text/plain', 'Unknown path\n') return