def _request(**kwargs): def get_api_name(): if 'api_name' in kwargs: return kwargs['api_name'] f = inspect.currentframe() for _ in range(_MAX_FRAME_SEARCH_DEPTH): m = re.search(r'^api_.*$', f.f_code.co_name) if m: return m.group() f = f.f_back else: raise AssertionError( '{}: max frame search depth reached'.format(f.f_code) ) k = PKDict(kwargs) u = k.pkdel('_request_uri') or cfg.supervisor_uri + sirepo.job.SERVER_URI c = k.pkdel('_request_content') if '_request_content' in k else _request_content(k) c.pkupdate( api=get_api_name(), serverSecret=sirepo.job.cfg.server_secret, ) pkdlog('api={} runDir={}', c.api, c.get('runDir')) r = requests.post( u, data=pkjson.dump_bytes(c), headers=PKDict({'Content-type': 'application/json'}), verify=sirepo.job.cfg.verify_tls, ) r.raise_for_status() return pkjson.load_any(r.content)
async def _fastcgi_read(self, connection): s = None m = None try: s = tornado.iostream.IOStream( connection, max_buffer_size=job.cfg.max_message_bytes, ) while True: m = await self._fastcgi_msg_q.get() # Avoid issues with exceptions. We don't use q.join() # so not an issue to call before work is done. self._fastcgi_msg_q.task_done() await s.write(pkjson.dump_bytes(m) + b'\n') await self.job_cmd_reply( m, job.OP_ANALYSIS, await s.read_until(b'\n', job.cfg.max_message_bytes), ) except Exception as e: pkdlog('msg={} error={} stack={}', m, e, pkdexc()) # If self.fastcgi_cmd is None we initiated the kill so not an error if not self.fastcgi_cmd: return await self._fastcgi_handle_error(m, e, pkdexc()) finally: if s: s.close()
async def send(self, op): #TODO(robnagler) need to send a retry to the ops, which should requeue # themselves at an outer level(?). # If a job is still running, but we just lost the websocket, want to # pickup where we left off. If the op already was written, then you # have to ask the agent. If ops are idempotent, we can simply # resend the request. If it is in process, then it will be reconnected # to the job. If it was already completed (and reply on the way), then # we can cache that state in the agent(?) and have it send the response # twice(?). self.ops_pending_send.append(op) if not self.websocket and not self._agent_starting: pkdlog('starting agentId={} uid={}', self._agentId, self.uid) await self._agent_start(op.msg) self.run_scheduler() await op.send_ready.wait() if op.do_not_send: pkdlog('op finished without being sent op={}', job.LogFormatter(op)) else: pkdlog( 'op={} agentId={} opId={} runDir={}', op.opName, self._agentId, op.opId, op.msg.get('runDir') ) op.start_timer() self.websocket.write_message(pkjson.dump_bytes(op.msg))
def _rpc(request): """Send an RPC message to the runner daemon, and get the response. Args: request: the request, as a json-encodeable object Returns: response: the server response """ request_bytes = pkjson.dump_bytes(request) with contextlib.closing(socket.socket(socket.AF_UNIX)) as sock: sock.connect(str(srdb.runner_socket_path())) # send the request sock.sendall(request_bytes) # send EOF, so the other side knows we've sent the whole thing sock.shutdown(socket.SHUT_WR) # read the response response_bytes = bytearray() while True: chunk = sock.recv(_CHUNK_SIZE) if not chunk: break response_bytes += chunk if response_bytes == b'': raise AssertionError('runner daemon had an unknown error') return pkjson.load_any(bytes(response_bytes))
def send(self, op): pkdlog( '{} {} runDir={}', self, op, op.msg.get('runDir') ) self._websocket.write_message(pkjson.dump_bytes(op.msg))
def test_dump_bytes(): import json from pykern import pkjson from pykern.pkunit import pkeq v = ['a', 'b'] expect = json.dumps(v).encode(pkjson.ENCODING).replace(' ', '') actual = pkjson.dump_bytes(v) pkeq(expect, actual) actual = pkjson.load_any(actual) pkeq(v, actual)
def _do_fastcgi(msg, template): import socket @contextlib.contextmanager def _update_run_dir_and_maybe_chdir(msg): msg.runDir = pkio.py_path(msg.runDir) if msg.runDir else None with pkio.save_chdir( msg.runDir, ) if msg.runDir else contextlib.nullcontext(): yield def _recv(): m = b'' while True: r = s.recv(_MAX_FASTCGI_MSG) if not r: pkdlog( 'job_cmd should be killed before socket is closed msg={}', msg, ) raise _AbruptSocketCloseError() if len(m) + len(r) > _MAX_FASTCGI_MSG: raise RuntimeError('message larger than {} bytes', _MAX_FASTCGI_MSG) m += r if m[-1:] == b'\n': return pkjson.load_any(m) s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) s.connect(msg.fastcgiFile) c = 0 while True: try: m = _recv() if not m: return with _update_run_dir_and_maybe_chdir(m): r = globals()['_do_' + m.jobCmd](m, sirepo.template.import_module( m.simulationType)) r = PKDict(r).pksetdefault(state=job.COMPLETED) c = 0 except _AbruptSocketCloseError: return except Exception as e: assert c < _MAX_FASTCGI_EXCEPTIONS, \ 'too many fastgci exceptions {}. Most recent error={}'.format(c, e) c += 1 r = _maybe_parse_user_alert(e) s.sendall(pkjson.dump_bytes(r) + b'\n')
async def post(self, uri, data, caller): data.simulationType = self.sim_type uri = self._uri(uri) with self._timer(uri, caller): return self.parse_response( await self._client.fetch( uri, body=pkjson.dump_bytes(data), headers=self._headers.pksetdefault('Content-type', 'application/json'), method='POST', connect_timeout=1e8, request_timeout=1e8, ), )
def _do_fastcgi(msg, template): import socket def _recv(): m = b'' while True: r = s.recv(_MAX_FASTCGI_MSG) if not r: pkdlog( 'job_cmd should be killed before socket is closed msg={}', msg, ) raise _AbruptSocketCloseError() if len(m) + len(r) > _MAX_FASTCGI_MSG: raise RuntimeError('message larger than {} bytes', _MAX_FASTCGI_MSG) m += r if m[-1:] == b'\n': return pkjson.load_any(m) s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) s.connect(msg.fastcgiFile) c = 0 while True: try: m = _recv() if not m: return m.runDir = pkio.py_path(m.runDir) with pkio.save_chdir(m.runDir): r = globals()['_do_' + m.jobCmd](m, sirepo.template.import_module( m.simulationType)) r = PKDict(r).pksetdefault(state=job.COMPLETED) c = 0 except _AbruptSocketCloseError: return except Exception as e: assert c < _MAX_FASTCGI_EXCEPTIONS, \ 'too many fastgci exceptions {}. Most recent error={}'.format(c, e) c += 1 r = PKDict( state=job.ERROR, error=e.sr_args.error if isinstance(e, sirepo.util.UserAlert) else str(e), stack=pkdexc(), ) s.sendall(pkjson.dump_bytes(r) + b'\n')
def compute_job_hash(cls, data): """Hash fields related to data and set computeJobHash Only needs to be unique relative to the report, not globally unique so MD5 is adequate. Long and cryptographic hashes make the cache checks slower. Args: data (dict): simulation data changed (callable): called when value changed Returns: bytes: hash value """ cls._assert_server_side() c = cls.compute_model(data) if data.get('forceRun') or cls.is_parallel(c): return 'HashIsUnused' m = data['models'] res = hashlib.md5() fields = sirepo.sim_data.get_class( data.simulationType )._compute_job_fields(data, data.report, c) # values may be string or PKDict fields.sort(key=lambda x:str(x)) for f in fields: # assert isinstance(f, pkconfig.STRING_TYPES), \ # 'value={} not a string_type'.format(f) #TODO(pjm): work-around for now if isinstance(f, pkconfig.STRING_TYPES): x = f.split('.') value = m[x[0]][x[1]] if len(x) > 1 else m[x[0]] else: value = f res.update( pkjson.dump_bytes( value, sort_keys=True, allow_nan=False, ) ) res.update( ''.join( (str(cls.lib_file_abspath(b, data=data).mtime()) for b in sorted( cls.lib_file_basenames(data)) ), ).encode()) return res.hexdigest()
async def _handle_conn(job_tracker, stream): with _catch_and_log_errors(Exception, 'error handling request'): request_bytes = bytearray() while True: chunk = await stream.receive_some(_CHUNK_SIZE) if not chunk: break request_bytes += chunk request = pkjson.load_any(request_bytes) if 'run_dir' in request: request.run_dir = pkio.py_path(request.run_dir) pkdc('runner request: {!r}', request) handler = _RPC_HANDLERS[request.action] async with job_tracker.locks[request.run_dir]: response = await handler(job_tracker, request) pkdc('runner response: {!r}', response) response_bytes = pkjson.dump_bytes(response) await stream.send_all(response_bytes)
def format_op(self, msg, opName, **kwargs): if msg: kwargs['opId'] = msg.get('opId') return pkjson.dump_bytes( PKDict(agentId=cfg.agent_id, opName=opName).pksetdefault(**kwargs), )