async def _make_local_thread() -> Thread: """Create the local thread, allocating various resources locally. For the most part, the local thread is like any other thread; it just bootstraps differently, and uses syscall and memory interfaces which are specialized to the local thread. """ process = near.Process(os.getpid()) task = Task( LocalSyscall(), process, far.FDTable(process.id), far.AddressSpace(process.id), far.PidNamespace(process.id), ) ram = RAM(task, LocalMemoryTransport(task), memory.AllocatorClient.make_allocator(task)) epfd = await task.epoll_create() async def wait_readable(): logger.debug("wait_readable(%s)", epfd.near.number) await trio.hazmat.wait_readable(epfd.near.number) epoller = Epoller.make_subsidiary(ram, epfd, wait_readable) thread = Thread( task, ram, await FDPassConnection.make(task, ram, epoller), NativeLoader.make_from_symbols(task, lib), epoller, await ChildProcessMonitor.make(ram, task, epoller), Environment(task, ram, { key.encode(): value.encode() for key, value in os.environ.items() }), stdin=task.make_fd_handle(near.FileDescriptor(0)), stdout=task.make_fd_handle(near.FileDescriptor(1)), stderr=task.make_fd_handle(near.FileDescriptor(2)), ) return thread
async def _make_local_process() -> Process: """Create the local process, allocating various resources locally. For the most part, the local process is like any other process; it just bootstraps differently, and uses syscall and memory interfaces which are specialized to the local process. """ pid = near.Pid(os.getpid()) task = Task( pid, handle.FDTable(pid.id), far.AddressSpace(pid.id), far.PidNamespace(pid.id), far.MountNamespace(pid.id), ) task.sysif = LocalSyscall(task) task.allocator = await memory.AllocatorClient.make_allocator(task) epfd = await task.epoll_create() async def wait_readable(): logger.debug("wait_readable(%s)", epfd.near.number) await trio.lowlevel.wait_readable(epfd.near.number) trio_system_wait_readable = TrioSystemWaitReadable(epfd.near.number) set_trio_system_wait_readable(trio_system_wait_readable) epoller = Epoller.make_subsidiary(epfd, trio_system_wait_readable.wait) process = Process( task, await FDPassConnection.make(task, epoller), NativeLoader.make_from_symbols(task, lib), epoller, await ChildPidMonitor.make(task, epoller), Environment.make_from_environ(task, {**os.environ}), stdin=task.make_fd_handle(near.FileDescriptor(0)), stdout=task.make_fd_handle(near.FileDescriptor(1)), stderr=task.make_fd_handle(near.FileDescriptor(2)), ) return process
async def stdin_bootstrap( parent: Thread, bootstrap_command: Command, ) -> t.Tuple[AsyncChildProcess, Thread]: """Create a thread from running an arbitrary command which must run rsyscall-stdin-bootstrap bootstrap_command can be any arbitrary command, but it must eventually exec rsyscall-stdin-bootstrap, and pass down stdin when it does. We'll fork and exec bootstrap_command, passing down a socketpair for stdin, and try to bootstrap over the other end of the socketpair. Once rsyscall-stdin-bootstrap starts, it will respond to our bootstrap and we'll create a new thread. """ #### fork and exec into the bootstrap command child = await parent.fork() # create the socketpair that will be used as stdin stdin_pair = await (await parent.task.socketpair( AF.UNIX, SOCK.STREAM, 0, await parent.ram.malloc(Socketpair))).read() parent_sock = stdin_pair.first child_sock = stdin_pair.second.move(child.task) # set up stdin with socketpair await child.unshare_files(going_to_exec=True) await child.stdin.replace_with(child_sock) # exec child_process = await child.exec(bootstrap_command) #### set up all the fds we'll want to pass over # the basic connections [(access_syscall_sock, passed_syscall_sock), (access_data_sock, passed_data_sock)] = await parent.open_async_channels(2) # memfd for setting up the futex futex_memfd = await parent.task.memfd_create( await parent.ram.ptr(Path("child_robust_futex_list"))) # send the fds to the new process connection_fd, make_connection = await parent.connection.prep_fd_transfer() async def sendmsg_op(sem: RAM) -> WrittenPointer[SendMsghdr]: iovec = await sem.ptr(IovecList([await sem.malloc(bytes, 1)])) cmsgs = await sem.ptr(CmsgList([CmsgSCMRights([ passed_syscall_sock, passed_data_sock, futex_memfd, connection_fd])])) return await sem.ptr(SendMsghdr(None, iovec, cmsgs)) _, [] = await parent_sock.sendmsg(await parent.ram.perform_batch(sendmsg_op), SendmsgFlags.NONE) # close our reference to fds that only the new process needs await passed_syscall_sock.close() await passed_data_sock.close() # close the socketpair await parent_sock.close() #### read describe to get all the information we need from the new process describe_buf = AsyncReadBuffer(access_data_sock) describe_struct = await describe_buf.read_cffi('struct rsyscall_stdin_bootstrap') environ = await describe_buf.read_envp(describe_struct.envp_count) #### build the new task pid = describe_struct.pid fd_table = far.FDTable(pid) address_space = far.AddressSpace(pid) # we assume pid namespace is shared # TODO include namespace inode numbers numbers in describe # note: if we start dealing with namespace numbers then we need to # have a Kernel namespace which tells us which kernel we get those # numbers from. # oh hey we can conveniently dump the inode numbers with getdents! pidns = parent.task.pidns process = near.Process(pid) remote_syscall_fd = near.FileDescriptor(describe_struct.syscall_fd) syscall = NonChildSyscallInterface(SyscallConnection(access_syscall_sock, access_syscall_sock), process) base_task = Task(syscall, process, fd_table, address_space, pidns) handle_remote_syscall_fd = base_task.make_fd_handle(remote_syscall_fd) syscall.store_remote_side_handles(handle_remote_syscall_fd, handle_remote_syscall_fd) allocator = memory.AllocatorClient.make_allocator(base_task) # we assume our SignalMask is zero'd before being started, so we don't inherit it ram = RAM(base_task, SocketMemoryTransport(access_data_sock, base_task.make_fd_handle(near.FileDescriptor(describe_struct.data_fd)), allocator), allocator) # TODO I think I can maybe elide creating this epollcenter and instead inherit it or share it, maybe? epoller = await Epoller.make_root(ram, base_task) child_monitor = await ChildProcessMonitor.make(ram, base_task, epoller) connection = make_connection(base_task, ram, base_task.make_fd_handle(near.FileDescriptor(describe_struct.connecting_fd))) new_parent = Thread( task=base_task, ram=ram, connection=connection, loader=NativeLoader.make_from_symbols(base_task, describe_struct.symbols), epoller=epoller, child_monitor=child_monitor, environ=Environment(base_task, ram, environ), stdin=base_task.make_fd_handle(near.FileDescriptor(0)), stdout=base_task.make_fd_handle(near.FileDescriptor(1)), stderr=base_task.make_fd_handle(near.FileDescriptor(2)), ) #### TODO set up futex I guess remote_futex_memfd = near.FileDescriptor(describe_struct.futex_memfd) return child_process, new_parent
async def ssh_bootstrap( parent: Process, # the actual ssh command to run ssh_command: SSHCommand, # the local path we'll use for the socket local_socket_path: Path, # the directory we're bootstrapping out of tmp_path_bytes: bytes, ) -> t.Tuple[AsyncChildPid, Process]: "Over ssh, run the bootstrap executable, " # identify local path local_data_addr = await parent.ram.ptr( await SockaddrUn.from_path(parent, local_socket_path)) # start port forwarding; we'll just leak this process, no big deal # TODO we shouldn't leak processes; we should be GCing processes at some point forward_child_pid = await ssh_forward( parent, ssh_command, local_socket_path, (tmp_path_bytes + b"/data").decode()) # start bootstrap bootstrap_process = await parent.fork() bootstrap_child_pid = await bootstrap_process.exec(ssh_command.args( "-n", f"cd {tmp_path_bytes.decode()}; exec ./bootstrap rsyscall" )) # TODO should unlink the bootstrap after I'm done execing. # it would be better if sh supported fexecve, then I could unlink it before I exec... # Connect to local socket 4 times async def make_async_connection() -> AsyncFileDescriptor: sock = await parent.make_afd(await parent.socket(AF.UNIX, SOCK.STREAM|SOCK.NONBLOCK)) await sock.connect(local_data_addr) return sock async_local_syscall_sock = await make_async_connection() async_local_data_sock = await make_async_connection() # Read description off of the data sock describe_buf = AsyncReadBuffer(async_local_data_sock) describe_struct = await describe_buf.read_cffi('struct rsyscall_bootstrap') new_pid = describe_struct.pid environ = await describe_buf.read_envp(describe_struct.envp_count) # Build the new task! new_address_space = far.AddressSpace(new_pid) # TODO the pid namespace will probably be common for all connections... # TODO we should get this from the SSHHost, this is usually going # to be common for all connections and we should express that new_pid_namespace = far.PidNamespace(new_pid) new_pid = near.Pid(new_pid) new_base_task = Task( new_pid, handle.FDTable(new_pid), new_address_space, new_pid_namespace, ) handle_remote_syscall_fd = new_base_task.make_fd_handle(near.FileDescriptor(describe_struct.syscall_sock)) new_base_task.sysif = SyscallConnection( logger.getChild(str(new_pid)), async_local_syscall_sock, async_local_syscall_sock, handle_remote_syscall_fd, handle_remote_syscall_fd, ) handle_remote_data_fd = new_base_task.make_fd_handle(near.FileDescriptor(describe_struct.data_sock)) handle_listening_fd = new_base_task.make_fd_handle(near.FileDescriptor(describe_struct.listening_sock)) new_allocator = memory.AllocatorClient.make_allocator(new_base_task) new_transport = SocketMemoryTransport(async_local_data_sock, handle_remote_data_fd) # we don't inherit SignalMask; we assume ssh zeroes the sigmask before starting us new_ram = RAM(new_base_task, new_transport, new_allocator) epoller = await Epoller.make_root(new_ram, new_base_task) child_monitor = await ChildPidMonitor.make(new_ram, new_base_task, epoller) await handle_listening_fd.fcntl(F.SETFL, O.NONBLOCK) connection = ListeningConnection( parent.task, parent.ram, parent.epoller, local_data_addr, new_base_task, new_ram, await AsyncFileDescriptor.make(epoller, new_ram, handle_listening_fd), ) new_process = Process( task=new_base_task, ram=new_ram, connection=connection, loader=NativeLoader.make_from_symbols(new_base_task, describe_struct.symbols), epoller=epoller, child_monitor=child_monitor, environ=Environment.make_from_environ(new_base_task, new_ram, environ), stdin=new_base_task.make_fd_handle(near.FileDescriptor(0)), stdout=new_base_task.make_fd_handle(near.FileDescriptor(1)), stderr=new_base_task.make_fd_handle(near.FileDescriptor(2)), ) return bootstrap_child_pid, new_process
async def _setup_stub( thread: Thread, bootstrap_sock: FileDescriptor, ) -> t.Tuple[t.List[str], Thread]: "Setup a stub thread" [(access_syscall_sock, passed_syscall_sock), (access_data_sock, passed_data_sock) ] = await thread.open_async_channels(2) # memfd for setting up the futex futex_memfd = await thread.task.memfd_create(await thread.ram.ptr( Path("child_robust_futex_list"))) # send the fds to the new process connection_fd, make_connection = await thread.connection.prep_fd_transfer() async def sendmsg_op(sem: RAM) -> WrittenPointer[SendMsghdr]: iovec = await sem.ptr(IovecList([await sem.malloc(bytes, 1)])) cmsgs = await sem.ptr( CmsgList([ CmsgSCMRights([ passed_syscall_sock, passed_data_sock, futex_memfd, connection_fd ]) ])) return await sem.ptr(SendMsghdr(None, iovec, cmsgs)) _, [] = await bootstrap_sock.sendmsg( await thread.ram.perform_batch(sendmsg_op), SendmsgFlags.NONE) # close our reference to fds that only the new process needs await passed_syscall_sock.invalidate() await passed_data_sock.invalidate() # close the socketpair await bootstrap_sock.invalidate() #### read describe to get all the information we need from the new process describe_buf = AsyncReadBuffer(access_data_sock) describe_struct = await describe_buf.read_cffi('struct rsyscall_unix_stub') argv_raw = await describe_buf.read_length_prefixed_array( describe_struct.argc) argv = [os.fsdecode(arg) for arg in argv_raw] environ = await describe_buf.read_envp(describe_struct.envp_count) #### build the new task pid = describe_struct.pid fd_table = handle.FDTable(pid) address_space = far.AddressSpace(pid) # we assume pid namespace is shared pidns = thread.task.pidns process = near.Process(pid) # we assume net namespace is shared - that's dubious... # we should make it possible to control the namespace sharing more, hmm. # TODO maybe the describe should contain the net namespace number? and we can store our own as well? # then we can automatically do it right base_task = Task(process, fd_table, address_space, pidns) remote_syscall_fd = base_task.make_fd_handle( near.FileDescriptor(describe_struct.syscall_fd)) base_task.sysif = SyscallConnection( logger.getChild(str(process)), access_syscall_sock, access_syscall_sock, remote_syscall_fd, remote_syscall_fd, ) allocator = memory.AllocatorClient.make_allocator(base_task) base_task.sigmask = Sigset( {SIG(bit) for bit in rsyscall.struct.bits(describe_struct.sigmask)}) ram = RAM( base_task, SocketMemoryTransport( access_data_sock, base_task.make_fd_handle( near.FileDescriptor(describe_struct.data_fd))), allocator) # TODO I think I can maybe elide creating this epollcenter and instead inherit it or share it, maybe? # I guess I need to write out the set too in describe epoller = await Epoller.make_root(ram, base_task) child_monitor = await ChildProcessMonitor.make(ram, base_task, epoller) connection = make_connection( base_task, ram, base_task.make_fd_handle( near.FileDescriptor(describe_struct.connecting_fd))) new_thread = Thread( task=base_task, ram=ram, connection=connection, loader=NativeLoader.make_from_symbols(base_task, describe_struct.symbols), epoller=epoller, child_monitor=child_monitor, environ=Environment.make_from_environ(base_task, ram, environ), stdin=base_task.make_fd_handle(near.FileDescriptor(0)), stdout=base_task.make_fd_handle(near.FileDescriptor(1)), stderr=base_task.make_fd_handle(near.FileDescriptor(2)), ) #### TODO set up futex I guess remote_futex_memfd = near.FileDescriptor(describe_struct.futex_memfd) return argv, new_thread
async def stdin_bootstrap( parent: Process, bootstrap_command: Command, ) -> t.Tuple[AsyncChildPid, Process]: """Create a process from running an arbitrary command which must run rsyscall-stdin-bootstrap bootstrap_command can be any arbitrary command, but it must eventually exec rsyscall-stdin-bootstrap, and pass down stdin when it does. We'll clone and exec bootstrap_command, passing down a socketpair for stdin, and try to bootstrap over the other end of the socketpair. Once rsyscall-stdin-bootstrap starts, it will respond to our bootstrap and we'll create a new process. """ #### clone and exec into the bootstrap command # create the socketpair that will be used as stdin stdin_pair = await (await parent.task.socketpair( AF.UNIX, SOCK.STREAM, 0, await parent.task.malloc(Socketpair))).read() parent_sock = stdin_pair.first child = await parent.fork() # set up stdin with socketpair await child.task.inherit_fd(stdin_pair.second).dup2(child.stdin) await stdin_pair.second.close() # exec child_pid = await child.exec(bootstrap_command) #### set up all the fds we'll want to pass over # the basic connections [(access_syscall_sock, passed_syscall_sock), (access_data_sock, passed_data_sock) ] = await parent.open_async_channels(2) # send the fds to the new process connection_fd, make_connection = await parent.connection.prep_fd_transfer() iovec = await parent.ptr(IovecList([await parent.malloc(bytes, 1)])) cmsgs = await parent.ptr( CmsgList([ CmsgSCMRights( [passed_syscall_sock, passed_data_sock, connection_fd]) ])) _, [] = await parent_sock.sendmsg( await parent.ptr(SendMsghdr(None, iovec, cmsgs)), SendmsgFlags.NONE) # close our reference to fds that only the new process needs await passed_syscall_sock.close() await passed_data_sock.close() # close the socketpair await parent_sock.close() #### read describe to get all the information we need from the new process describe_buf = AsyncReadBuffer(access_data_sock) describe_struct = await describe_buf.read_cffi( 'struct rsyscall_stdin_bootstrap') environ = await describe_buf.read_envp(describe_struct.envp_count) #### build the new task pid = describe_struct.pid fd_table = handle.FDTable(pid) address_space = far.AddressSpace(pid) # we assume pid namespace is shared # TODO include namespace inode numbers numbers in describe # note: if we start dealing with namespace numbers then we need to # have a Kernel namespace which tells us which kernel we get those # numbers from. # oh hey we can conveniently dump the inode numbers with getdents! pidns = parent.task.pidns # we assume mount namespace is not shared (can't hurt) mountns = far.MountNamespace(pid) pid = near.Pid(pid) base_task = Task(pid, fd_table, address_space, pidns, mountns) remote_syscall_fd = base_task.make_fd_handle( near.FileDescriptor(describe_struct.syscall_fd)) base_task.sysif = SyscallConnection( logger.getChild(str(pid)), access_syscall_sock, remote_syscall_fd, ) base_task.allocator = await memory.AllocatorClient.make_allocator(base_task ) # we assume our SignalMask is zero'd before being started, so we don't inherit it # TODO I think I can maybe elide creating this epollcenter and instead inherit it or share it, maybe? epoller = await Epoller.make_root(base_task) child_monitor = await ChildPidMonitor.make(base_task, epoller) connection = make_connection( base_task, base_task.make_fd_handle( near.FileDescriptor(describe_struct.connecting_fd))) new_parent = Process( task=base_task, connection=connection, loader=NativeLoader.make_from_symbols(base_task, describe_struct.symbols), epoller=epoller, child_monitor=child_monitor, environ=Environment.make_from_environ(base_task, environ), stdin=base_task.make_fd_handle(near.FileDescriptor(0)), stdout=base_task.make_fd_handle(near.FileDescriptor(1)), stderr=base_task.make_fd_handle(near.FileDescriptor(2)), ) return child_pid, new_parent