def startup(self): ''' Start the Store. ''' self.runstate.start() self.__funcQ = Later(self._capacity, name="%s:Later(__funcQ)" % (self.name, )) self.__funcQ.open()
def test09pipeline_00noop(self): ''' Run a single stage one to one no-op pipeline. ''' with Later(1) as L: items = ['a', 'b', 'c', 'g', 'f', 'e'] P = pipeline(L, [(FUNC_ONE_TO_ONE, lambda x: x)], items) result = list(P.outQ) self.assertEqual(items, result)
def test07report(self): ''' Report LateFunctions in order of completion. ''' with Later(3) as L3: LF1 = L3.defer(self._delay, 3) LF2 = L3.defer(self._delay, 2) LF3 = L3.defer(self._delay, 1) results = [LF() for LF in report((LF1, LF2, LF3))] self.assertEqual(results, [1, 2, 3])
def s3scrape(bucket_pool, srcurl, doit=False, do_delete=False, do_upload=False): ''' Sync website to S3 directory tree. ''' global UPD ok = True L = Later(4, name="s3scrape(%r, %r)" % (bucket_pool.bucket_name, srcurl)) with L: if do_upload: Q = IterableQueue() def dispatch(): for LF in s3scrape_async(L, bucket_pool, srcurl, doit=doit, do_delete=do_delete): Q.put(LF) Q.close() Thread(target=dispatch).start() for LF in Q: diff, ctype, srcU, dstpath, e, error_msg = LF() with Pfx(srcU): if e: error(error_msg) ok = False else: line = "%s %-25s %s" % (diff.summary(), ctype, dstpath) if diff.unchanged: UPD.out(line) ##UPD.nl(line) else: if diff.changed_fields() == ['time']: # be quiet about time changes UPD.out(line) else: UPD.nl(line) ##UPD.nl(" %r", diff.metadata) if do_delete: # now process deletions with bucket_pool.instance() as B: ##if dstdir: ## dstdir_prefix = dstdir + RSEP ##else: ## dstdir_prefix = '' dstdir_prefix = RSEP with Pfx("S3.filter(Prefix=%r)", dstdir_prefix): dstdelpaths = [] for s3obj in B.objects.filter(Prefix=dstdir_prefix): dstpath = s3obj.key with Pfx(dstpath): if not dstpath.startswith(dstdir_prefix): error("unexpected dstpath, not in subdir") continue dstrpath = dstpath[len(dstdir_prefix):] if dstrpath.startswith(RSEP): error("unexpected dstpath, extra %r", RSEP) continue raise RuntimeError("DELETION UNIMPLEMENTED") srcpath = joinpath(srcdir, s32path(dstrpath, unpercent)) if os.path.exists(srcpath): ##info("src exists, not deleting (src=%r)", srcpath) continue ## uncomment if new %hh omissions surface ##UPD.nl("MISSING local %r", srcpath) if dstrpath.endswith(RSEP): # a folder UPD.nl("d DEL %s", dstpath) else: UPD.nl("* DEL %s", dstpath) dstdelpaths.append(dstpath) if dstdelpaths: dstdelpaths = sorted(dstdelpaths, reverse=True) while dstdelpaths: delpaths = dstdelpaths[:S3_MAX_DELETE_OBJECTS] if doit: result = B.delete_objects( Delete={ 'Objects': [{ 'Key': dstpath } for dstpath in delpaths] }) errs = result.get('Errors') if errs: ok = False for err in errors: error("delete: %s: %r", err['Message'], err['Key']) dstdelpaths[:len(delpaths)] = [] L.wait() return ok
def setUp(self): ''' Set up a Later, log to the terminal. ''' self.L = Later(2) self.L.open() self.L.logTo("/dev/tty")
class TestLater(unittest.TestCase): ''' Unit tests for the Later class. ''' @staticmethod def _f(x): return x * 2 @staticmethod def _delay(n): sleep(n) return n class _Bang(Exception): pass @staticmethod def _bang(): raise TestLater._Bang() def setUp(self): ''' Set up a Later, log to the terminal. ''' self.L = Later(2) self.L.open() self.L.logTo("/dev/tty") def tearDown(self): ''' Close the Later. ''' self.L.close() def test00one(self): ''' Compute 3*2. ''' L = self.L F = partial(self._f, 3) LF = L.defer(F) x = LF() self.assertEqual(x, 6) def test01two(self): ''' Run two sleep(2) in parallel. ''' L = self.L F = partial(self._delay, 2) LF1 = L.defer(F) LF2 = L.defer(F) now = time.time() LF1() LF2() again = time.time() elapsed = again - now self.assertTrue( elapsed < 3, "elapsed (%s) >= 3, now = %s, again = %s" % (elapsed, now, again)) def test02three(self): ''' Three sleep(2), two in parallel, one delayed. ''' L = self.L F = partial(self._delay, 2) LF1 = L.defer(F) LF2 = L.defer(F) LF3 = L.defer(F) now = time.time() LF1() LF2() LF3() elapsed = time.time() - now self.assertTrue(elapsed >= 4, "elapsed (%s) < 4" % (elapsed, )) def test03calltwice(self): ''' Run a LateFunction once, get results twice. ''' L = self.L F = partial(self._f, 5) LF = L.defer(F) x = LF() self.assertEqual(x, 10) y = LF() self.assertEqual(y, 10) def test04raise(self): ''' A LateFunction which raises an exception. ''' LF = self.L.defer(self._bang) self.assertRaises(TestLater._Bang, LF) def test05raiseTwice(self): ''' A LateFunction which raises an exception, called twice. ''' LF = self.L.defer(self._bang) self.assertRaises(TestLater._Bang, LF) self.assertRaises(TestLater._Bang, LF) def test06defer_with_args(self): ''' Compute 7*2 using .defer_with_args(). ''' LF = self.L.defer(self._f, 7) x = LF() self.assertEqual(x, 14) def test07report(self): ''' Report LateFunctions in order of completion. ''' with Later(3) as L3: LF1 = L3.defer(self._delay, 3) LF2 = L3.defer(self._delay, 2) LF3 = L3.defer(self._delay, 1) results = [LF() for LF in report((LF1, LF2, LF3))] self.assertEqual(results, [1, 2, 3])
def __init__( self, E, *, S=None, archive=None, subpath=None, readonly=None, append_only=False, show_prev_dirent=False, thread_max=None, ): ''' Initialise a new mountpoint. Parameters: * `E`: the root directory reference * `S`: the backing Store * `archive`: if not None, an Archive or similar, with a `.update(Dirent[,when])` method * `subpath`: relative path to mount Dir * `readonly`: forbid data modification * `append_only`: append only mode: files may only grow, filenames may not be changed or deleted * `show_prev_dirent`: show previous Dir revision as the '...' entry ''' if not E.isdir: raise ValueError("not dir Dir: %s" % (E,)) if S is None: S = defaults.S self._old_S_block_cache = S.block_cache self.block_cache = S.block_cache or defaults.block_cache or BlockCache() S.block_cache = self.block_cache S.open() if readonly is None: readonly = S.readonly if thread_max is None: thread_max = DEFAULT_FS_THREAD_MAX self.E = E self.S = S self.archive = archive if archive is None: self._last_sync_state = None else: self._last_sync_state = bytes(E) self.subpath = subpath self.readonly = readonly self.append_only = append_only self.show_prev_dirent = show_prev_dirent if subpath: # locate subdirectory to display at mountpoint mntE, _, tail_path = resolve(E, subpath) if tail_path: raise ValueError("subpath %r does not resolve" % (subpath,)) if not mntE.isdir: raise ValueError("subpath %r is not a directory" % (subpath,)) self.mntE = mntE else: mntE = E self.mntE = mntE self.is_darwin = os.uname().sysname == 'Darwin' self.device_id = -1 self._fs_uid = os.geteuid() self._fs_gid = os.getegid() self._lock = RLock() self._later = Later(DEFAULT_FS_THREAD_MAX) self._later.open() self._path_files = {} self._file_handles = [] inodes = self._inodes = Inodes(self) self[1] = mntE try: with Pfx("fs_inode_dirents"): fs_inode_dirents = E.meta.get("fs_inode_dirents") X("FS INIT: fs_inode_dirents=%s", fs_inode_dirents) if fs_inode_dirents: inode_dir, offset = _Dirent.from_str(fs_inode_dirents) if offset < len(fs_inode_dirents): warning( "unparsed text after Dirent: %r", fs_inode_dirents[offset:] ) X("IMPORT INODES:") dump_Dirent(inode_dir) inodes.load_fs_inode_dirents(inode_dir) else: X("NO INODE IMPORT") X("FileSystem mntE:") with self.S: with stackattrs(defaults, fs=self): dump_Dirent(mntE) except Exception as e: exception("exception during initial report: %s", e)
class FileSystem: ''' The core filesystem functionality supporting FUSE operations and in principle other filesystem-like access. See the `cs.vt.fuse` module for the `StoreFS_LLFUSE` class (aliased as `StoreFS`) and associated mount function which presents a `FileSystem` as a FUSE mount. TODO: medium term: see if this can be made into a VFS layer to support non-FUSE operation, for example a VT FTP client or the like. ''' def __init__( self, E, *, S=None, archive=None, subpath=None, readonly=None, append_only=False, show_prev_dirent=False, thread_max=None, ): ''' Initialise a new mountpoint. Parameters: * `E`: the root directory reference * `S`: the backing Store * `archive`: if not None, an Archive or similar, with a `.update(Dirent[,when])` method * `subpath`: relative path to mount Dir * `readonly`: forbid data modification * `append_only`: append only mode: files may only grow, filenames may not be changed or deleted * `show_prev_dirent`: show previous Dir revision as the '...' entry ''' if not E.isdir: raise ValueError("not dir Dir: %s" % (E,)) if S is None: S = defaults.S self._old_S_block_cache = S.block_cache self.block_cache = S.block_cache or defaults.block_cache or BlockCache() S.block_cache = self.block_cache S.open() if readonly is None: readonly = S.readonly if thread_max is None: thread_max = DEFAULT_FS_THREAD_MAX self.E = E self.S = S self.archive = archive if archive is None: self._last_sync_state = None else: self._last_sync_state = bytes(E) self.subpath = subpath self.readonly = readonly self.append_only = append_only self.show_prev_dirent = show_prev_dirent if subpath: # locate subdirectory to display at mountpoint mntE, _, tail_path = resolve(E, subpath) if tail_path: raise ValueError("subpath %r does not resolve" % (subpath,)) if not mntE.isdir: raise ValueError("subpath %r is not a directory" % (subpath,)) self.mntE = mntE else: mntE = E self.mntE = mntE self.is_darwin = os.uname().sysname == 'Darwin' self.device_id = -1 self._fs_uid = os.geteuid() self._fs_gid = os.getegid() self._lock = RLock() self._later = Later(DEFAULT_FS_THREAD_MAX) self._later.open() self._path_files = {} self._file_handles = [] inodes = self._inodes = Inodes(self) self[1] = mntE try: with Pfx("fs_inode_dirents"): fs_inode_dirents = E.meta.get("fs_inode_dirents") X("FS INIT: fs_inode_dirents=%s", fs_inode_dirents) if fs_inode_dirents: inode_dir, offset = _Dirent.from_str(fs_inode_dirents) if offset < len(fs_inode_dirents): warning( "unparsed text after Dirent: %r", fs_inode_dirents[offset:] ) X("IMPORT INODES:") dump_Dirent(inode_dir) inodes.load_fs_inode_dirents(inode_dir) else: X("NO INODE IMPORT") X("FileSystem mntE:") with self.S: with stackattrs(defaults, fs=self): dump_Dirent(mntE) except Exception as e: exception("exception during initial report: %s", e) def bg(self, func, *a, **kw): ''' Dispatch a function via the FileSystem's Later instance. ''' return self._later.defer(func, *a, **kw) def close(self): ''' Close the FileSystem. ''' self._sync() self.S.close() self.S.block_cache = self._old_S_block_cache def __str__(self): if self.subpath: return "<%s S=%s /=%s %r=%s>" % ( self.__class__.__name__, self.S, self.E, self.subpath, self.mntE ) return "%s(S=%s,/=%r)" % (type(self).__name__, self.S, self.E) def __getitem__(self, inum): ''' Lookup inode numbers or UUIDs. ''' return self._inodes[inum] def __setitem__(self, inum, E): ''' Associate a specific inode number with a Dirent. ''' self._inodes.add(E, inum) @logexc def _sync(self): with Pfx("_sync"): if defaults.S is None: raise RuntimeError("RUNTIME: defaults.S is None!") archive = self.archive if not self.readonly and archive is not None: with self._lock: E = self.E updated = False X("snapshot %r ...", E) E.snapshot() X("snapshot: afterwards E=%r", E) fs_inode_dirents = self._inodes.get_fs_inode_dirents() X("_SYNC: FS_INODE_DIRENTS:") dump_Dirent(fs_inode_dirents) X("set meta.fs_inode_dirents") if fs_inode_dirents.size > 0: E.meta['fs_inode_dirents'] = str(fs_inode_dirents) else: E.meta['fs_inode_dirents'] = '' new_state = bytes(E) if new_state != self._last_sync_state: archive.update(E) self._last_sync_state = new_state updated = True # debugging if updated: dump_Dirent(E, recurse=False) def _resolve(self, path): ''' Call paths.resolve and return its result. ''' return resolve(self.mntE, path) def _namei2(self, path): ''' Look up path. Raise OSError(ENOENT) if missing. Return Dirent, parent. ''' E, P, tail_path = self._resolve(path) if tail_path: OS_ENOENT("cannot resolve path %r", path) return E, P def _namei(self, path): ''' Look up path. Raise OSError(ENOENT) if missing. Return Dirent. ''' E, _ = self._namei2(path) return E @locked def E2inode(self, E): ''' Return the Inode for the supplied Dirent `E`. ''' if E.isindirect: E = E.ref return self._inodes.add(E) def i2E(self, inum): ''' Return the Dirent associated with the supplied `inum`. ''' I = self._inodes[inum] return I.E def open2(self, P, name, flags): ''' Open a regular file given parent Dir `P` and `name`, allocate FileHandle, return FileHandle index. Increments the kernel reference count. Wraps self.open. ''' if not P.isdir: OS_ENOTDIR("parent (name=%r) not a directory", P.name) if name in P: if flags & O_EXCL: OS_EEXIST("entry %r already exists", name) E = P[name] elif not flags & O_CREAT: OS_ENOENT("no entry named %r", name) else: E = FileDirent(name) P[name] = E return self.open(E, flags) def open(self, E, flags): ''' Open a regular file `E`, allocate FileHandle, return FileHandle index. Increments the kernel reference count. ''' for_read = (flags & O_RDONLY) == O_RDONLY or (flags & O_RDWR) == O_RDWR for_write = (flags & O_WRONLY) == O_WRONLY or (flags & O_RDWR) == O_RDWR for_append = (flags & O_APPEND) == O_APPEND for_trunc = (flags & O_TRUNC) == O_TRUNC debug( "for_read=%s, for_write=%s, for_append=%s", for_read, for_write, for_append ) if for_trunc and not for_write: OS_EINVAL("O_TRUNC requires O_WRONLY or O_RDWR") if for_append and not for_write: OS_EINVAL("O_APPEND requires O_WRONLY or O_RDWR") if (for_write and not for_append) and self.append_only: OS_EINVAL("fs is append_only but no O_APPEND") if for_trunc and self.append_only: OS_EINVAL("fs is append_only but O_TRUNC") if (for_write or for_append) and self.readonly: error("fs is readonly") OS_EROFS("fs is readonly") if E.issym: if flags & O_NOFOLLOW: OS_ELOOP("open symlink with O_NOFOLLOW") OS_EINVAL("open(%s)" % (E,)) elif not E.isfile: OS_EINVAL("open of nonfile: %s" % (E,)) FH = FileHandle(self, E, for_read, for_write, for_append, lock=self._lock) if flags & O_TRUNC: FH.truncate(0) return self._new_file_handle_index(FH) @locked def _fh(self, fhndx): try: fh = self._file_handles[fhndx] except IndexError: error("cannot look up FileHandle index %r", fhndx) raise return fh def _fh_remove(self, fhndx): self._file_handles[fhndx] = None def _fh_close(self, fhndx): fh = self._fh(fhndx) fh.close() self._fh_remove(fhndx) @locked def _new_file_handle_index(self, file_handle): ''' Allocate a new FileHandle index for a `file_handle`. TODO: linear allocation cost, may need recode if things get busy; might just need a list of released fds for reuse. ''' fhs = self._file_handles for fhndx, fh in enumerate(fhs): if fh is None: fhs[fhndx] = file_handle return fhndx fhs.append(file_handle) return len(fhs) - 1 @staticmethod def access(E, amode, uid=None, gid=None): ''' Check access mode `amode` against Dirent `E`. ''' with Pfx("access(E=%r,amode=%s,uid=%r,gid=%d)", E, amode, uid, gid): # test the access against the caller's uid/gid # pass same in as default file ownership in case there are no metadata return E.meta.access(amode, uid, gid, default_uid=uid, default_gid=gid) def getxattr(self, inum, xattr_name): ''' Get the extended attribute `xattr_name` from `inum`. ''' E = self.i2E(inum) xattr_name = Meta.xattrify(xattr_name) if xattr_name.startswith(XATTR_VT_PREFIX): # process special attribute names suffix = xattr_name[len(XATTR_VT_PREFIX):] if suffix == 'block': return str(E.block).encode() OS_EINVAL( "getxattr(inum=%s,xattr_name=%r): invalid %r prefixed name", inum, xattr_name, XATTR_VT_PREFIX ) xattr = E.meta.getxattr(xattr_name, None) if xattr is None: ##if xattr_name == 'com.apple.FinderInfo': ## OS_ENOTSUP("inum %d: no xattr %r, pretend not supported", inum, xattr_name) if self.is_darwin: OS_ENOATTR("inum %d: no xattr %r", inum, xattr_name) else: OS_ENODATA("inum %d: no xattr %r", inum, xattr_name) return xattr def removexattr(self, inum, xattr_name): ''' Remove the extended attribute named `xattr_name` from `inum`. ''' if self.readonly: OS_EROFS("fs is read only") E = self.i2E(inum) xattr_name = Meta.xattrify(xattr_name) if xattr_name.startswith(XATTR_VT_PREFIX): OS_EINVAL( "removexattr(inum=%s,xattr_name=%r): invalid %r prefixed name", inum, xattr_name, XATTR_VT_PREFIX ) meta = E.meta try: meta.delxattr(xattr_name) except KeyError: OS_ENOATTR("no such extended attribute: %r", xattr_name) def setxattr(self, inum, xattr_name, xattr_value): ''' Set the extended attribute `xattr_name` to `xattr_value` on inode `inum`. ''' if self.readonly: OS_EROFS("fs is read only") E = self.i2E(inum) xattr_name = Meta.xattrify(xattr_name) if not xattr_name.startswith(XATTR_VT_PREFIX): # ordinary attribute, set it and return E.meta.setxattr(xattr_name, xattr_value) return # process special attribute names with Pfx("%s.setxattr(%d,%r,%r)", self, inum, xattr_name, xattr_value): suffix = xattr_name[len(XATTR_VT_PREFIX):] with Pfx(suffix): if suffix == 'block': # update the Dirent's content directly if not E.isfile: OS_EINVAL("tried to update the data content of a nonfile: %s", E) block_s = Meta.xattrify(xattr_value) B, offset = parse(block_s) if offset < len(block_s): OS_EINVAL("unparsed text after trancription: %r", block_s[offset:]) if not isBlock(B): OS_EINVAL("not a Block transcription") info("%s: update .block directly to %r", E, str(B)) E.block = B return if suffix == 'control': argv = shlex.split(xattr_value.decode('utf-8')) if not argv: OS_EINVAL("no control command") op = argv.pop(0) with Pfx(op): if op == 'cache': if argv: OS_EINVAL("extra arguments: %r", argv) B = E.block if B.indirect: X("ADD BLOCK CACHE FOR %s", B) bm = self.block_cache.get_blockmap(B) X("==> BLOCKMAP: %s", bm) else: X("IGNORE BLOCK CACHE for %s: not indirect", B) return OS_EINVAL("unrecognised control command") OS_EINVAL("invalid %r prefixed name", XATTR_VT_PREFIX)
class _BasicStoreCommon(Mapping, MultiOpenMixin, HashCodeUtilsMixin, RunStateMixin, ABC): ''' Core functions provided by all Stores. Subclasses should not subclass this class but BasicStoreSync or BasicStoreAsync; these provide the *_bg or non-*_bg sibling methods of those described below so that a subclass need only implement the synchronous or asynchronous forms. Most local Stores will derive from BasicStoreSync and remote Stores derive from BasicStoreAsync. A subclass should provide thread-safe implementations of the following methods: .add(chunk) -> hashcode .get(hashcode, [default=None]) -> chunk (or default) .contains(hashcode) -> boolean .flush() A subclass _may_ provide thread-safe implementations of the following methods: .hashcodes(starting_hashcode, length) -> iterable-of-hashcodes The background (*_bg) functions return cs.later.LateFunction instances for deferred collection of the operation result. A convenience .lock attribute is provided for simple mutex use. The .readonly attribute may be set to prevent writes and trap surprises; it relies on assert statements. The .writeonly attribute may be set to trap surprises when no blocks are expected to be fetched; it relies on asssert statements. The mapping special methods __getitem__ and __contains__ call the implementation methods .get() and .contains(). ''' _seq = Seq() @fmtdoc def __init__(self, name, capacity=None, hashclass=None, runstate=None): ''' Initialise the Store. Parameters: * `name`: a name for this Store; if None, a sequential name based on the Store class name is generated * `capacity`: a capacity for the internal `Later` queue, default 4 * `hashclass`: the hash class to use for this Store, default: `DEFAULT_HASHCLASS` (`{DEFAULT_HASHCLASS.__name__}`) * `runstate`: a `cs.resources.RunState` for external control; if not supplied one is allocated ''' with Pfx("_BasicStoreCommon.__init__(%s,..)", name): if not isinstance(name, str): raise TypeError( "initial `name` argument must be a str, got %s" % (type(name), )) if name is None: name = "%s%d" % (type(self).__name__, next(self._seq())) if hashclass is None: hashclass = DEFAULT_HASHCLASS elif isinstance(hashclass, str): hashclass = HASHCLASS_BY_NAME[hashclass] assert issubclass(hashclass, HashCode) if capacity is None: capacity = 4 if runstate is None: runstate = RunState(name) RunStateMixin.__init__(self, runstate=runstate) self._str_attrs = {} self.name = name self._capacity = capacity self.__funcQ = None self.hashclass = hashclass self._config = None self.logfp = None self.mountdir = None self.readonly = False self.writeonly = False self._archives = {} self._blockmapdir = None self.block_cache = None def init(self): ''' Method provided to support "vt init". For stores requiring some physical setup, for example to create an empty DataDir, that code goes here. ''' def __str__(self): ##return "STORE(%s:%s)" % (type(self), self.name) params = [] for attr, val in sorted(self._str_attrs.items()): if isinstance(val, type): val_s = '<%s.%s>' % (val.__module__, val.__name__) else: val_s = str(val) params.append(attr + '=' + val_s) return "%s:%s(%s)" % (self.__class__.__name__, self.hashclass.HASHNAME, ','.join([repr(self.name)] + params)) __repr__ = __str__ __bool__ = lambda self: True # Basic support for putting Stores in sets. def __hash__(self): return id(self) def hash(self, data): ''' Return a HashCode instance from data bytes. NB: this does _not_ store the data. ''' return self.hashclass.from_chunk(data) # Stores are equal only to themselves. def __eq__(self, other): return self is other ################### ## Mapping methods. ## def __contains__(self, h): ''' Test if the supplied hashcode is present in the store. ''' return self.contains(h) def keys(self): ''' Return an iterator over the Store's hashcodes. ''' return self.hashcodes_from() def __iter__(self): ''' Return an iterator over the Store's hashcodes. ''' return self.keys() def __getitem__(self, h): ''' Return the data bytes associated with the supplied hashcode. Raise `MissingHashcodeError` (a subclass of `KeyError`) if the hashcode is not present. ''' block = self.get(h) if block is None: raise MissingHashcodeError(h) return block def __setitem__(self, h, data): ''' Save `data` against hash key `h`. Actually saves the data against the Store's hash function and raises `ValueError` if that does not match the supplied `h`. ''' h2 = self.add(data, type(h)) if h != h2: raise ValueError("h:%s != hash(data):%s" % (h, h2)) ################################################### ## Context manager methods via ContextManagerMixin. ## def __enter_exit__(self): with defaults(S=self): try: super_eeg = super().__enter_exit__ except AttributeError: def super_eeg(): yield eeg = super_eeg() next(eeg) yield try: next(eeg) except StopIteration: pass ########################## ## MultiOpenMixin methods. ## def startup(self): ''' Start the Store. ''' self.runstate.start() self.__funcQ = Later(self._capacity, name="%s:Later(__funcQ)" % (self.name, )) self.__funcQ.open() def shutdown(self): ''' Called by final MultiOpenMixin.close(). ''' self.runstate.cancel() L = self.__funcQ L.close() L.wait() del self.__funcQ self.runstate.stop() ############################# ## Function dispatch methods. ## def _defer(self, func, *args, **kwargs): ''' Defer a function via the internal `Later` queue. Hold opens on `self` to avoid easy shutdown. ''' self.open() def with_self(): with self: return func(*args, **kwargs) with_self.__name__ = "with_self:" + funcname(func) LF = self.__funcQ.defer(with_self) LF.notify(lambda LF: self.close()) return LF ########################################################################## # Core Store methods, all abstract. @abstractmethod def add(self, data): ''' Add the `data` to the Store, return its hashcode. ''' raise NotImplementedError() @abstractmethod def add_bg(self, data): ''' Dispatch the add request in the background, return a `Result`. ''' raise NotImplementedError() @abstractmethod # pylint: disable=unused-argument def get(self, h, default=None): ''' Fetch the data for hashcode `h` from the Store, or `None`. ''' raise NotImplementedError() @abstractmethod def get_bg(self, h): ''' Dispatch the get request in the background, return a `Result`. ''' raise NotImplementedError() @abstractmethod def contains(self, h): ''' Test whether the hashcode `h` is present in the Store. ''' raise NotImplementedError() @abstractmethod def contains_bg(self, h): ''' Dispatch the contains request in the background, return a `Result`. ''' raise NotImplementedError() @abstractmethod def flush(self): ''' Flush outstanding tasks to the next lowest abstraction. ''' raise NotImplementedError() @abstractmethod def flush_bg(self): ''' Dispatch the flush request in the background, return a `Result`. ''' raise NotImplementedError() ########################################################################## # Archive support. # pylint: disable=unused-argument def get_Archive(self, archive_name, missing_ok=False): ''' Fetch the named Archive or `None`. ''' warning("no get_Archive for %s", type(self).__name__) return None # pylint: disable=useless-return @prop def config(self): ''' The configuration for use with this Store. Falls back to `defaults.config`. ''' return self._config or defaults.config @config.setter def config(self, new_config): ''' Set the configuration for use with this Store. ''' self._config = new_config ########################################################################## # Blockmaps. @prop def blockmapdir(self): ''' The path to this Store's blockmap directory, if specified. Falls back too the Config.blockmapdir. ''' return self._blockmapdir or self.config.blockmapdir @blockmapdir.setter def blockmapdir(self, dirpath): ''' Set the Blockmap directory path. ''' self._blockmapdir = dirpath @require(lambda capacity: capacity >= 1) def pushto(self, dstS, *, capacity=64, progress=None): ''' Allocate a Queue for Blocks to push from this Store to another Store `dstS`. Return `(Q,T)` where `Q` is the new Queue and `T` is the Thread processing the Queue. Parameters: * `dstS`: the secondary Store to receive Blocks. * `capacity`: the Queue capacity, arbitrary default `64`. * `progress`: an optional `Progress` counting submitted and completed data bytes. Once called, the caller can then .put Blocks onto the Queue. When finished, call Q.close() to indicate end of Blocks and T.join() to wait for the processing completion. ''' sem = Semaphore(capacity) ##sem = Semaphore(1) name = "%s.pushto(%s)" % (self.name, dstS.name) with Pfx(name): Q = IterableQueue(capacity=capacity, name=name) srcS = self srcS.open() dstS.open() T = bg_thread(lambda: ( self.push_blocks(name, Q, srcS, dstS, sem, progress), srcS.close(), dstS.close(), )) return Q, T @staticmethod def push_blocks(name, blocks, srcS, dstS, sem, progress): ''' This is a worker function which pushes Blocks or bytes from the supplied iterable `blocks` to the second Store. Parameters: * `name`: name for this worker instance * `blocks`: an iterable of Blocks or bytes-like objects; each item may also be a tuple of `(block-or-bytes,length)` in which case the supplied length will be used for progress reporting instead of the default length ''' with Pfx("%s: worker", name): lock = Lock() with srcS: pending_blocks = {} # mapping of Result to Block for block in blocks: if type(block) is tuple: try: block1, length = block except TypeError as e: error("cannot unpack %s into Block and length: %s", type(block), e) continue else: block = block1 else: length = None sem.acquire() # worker function to add a block conditionally @logexc def add_block(srcS, dstS, block, length, progress): # add block content if not already present in dstS try: h = block.hashcode except AttributeError: # presume bytes-like or unStored Block type try: h = srcS.hash(block) except TypeError: warning("ignore object of type %s", type(block)) return if h not in dstS: dstS[h] = block else: # get the hashcode, only get the data if required h = block.hashcode if h not in dstS: dstS[h] = block.get_direct_data() if progress: if length is None: length = len(block) progress += length addR = bg_result(add_block, srcS, dstS, block, length, progress) with lock: pending_blocks[addR] = block # cleanup function @logexc def after_add_block(addR): ''' Forget that `addR` is pending. This will be called after `addR` completes. ''' with lock: pending_blocks.pop(addR) sem.release() addR.notify(after_add_block) with lock: outstanding = list(pending_blocks.keys()) if outstanding: info("PUSHQ: %d outstanding, waiting...", len(outstanding)) for R in outstanding: R.join()
def startup(self): ''' Set up the `Later` work queue. ''' self._makeQ = Later(self.parallel, self.name) self._makeQ.open()
class Maker(MultiOpenMixin): ''' Main class representing a set of dependencies to make. ''' def __init__(self, makecmd, parallel=1, name=None): ''' Initialise a Maker. `makecmd`: used to define $(MAKE), typically sys.argv[0]. `parallel`: the degree of parallelism of shell actions. ''' if parallel < 1: raise ValueError( "expected positive integer for parallel, got: %s" % (parallel, )) if name is None: name = cs.pfx.cmd MultiOpenMixin.__init__(self) self.parallel = parallel self.name = name self.debug = MakeDebugFlags() self.debug.debug = False # logging.DEBUG noise self.debug.flags = False # watch debug flag settings self.debug.make = False # watch make decisions self.debug.parse = False # watch Makefile parsing self.fail_fast = True self.no_action = False self.default_target = None self._makefiles = [] self.appendfiles = [] self.macros = {} # autocreating mapping interface to Targets self.targets = TargetMap(self) self.rules = {} self.precious = set() self.active = set() self._active_lock = Lock() self._namespaces = [{'MAKE': makecmd.replace('$', '$$')}] def __str__(self): return ( '%s:%s(parallel=%s,fail_fast=%s,no_action=%s,default_target=%s)' % (type(self).__name__, self.name, self.parallel, self.fail_fast, self.no_action, self.default_target.name)) def startup(self): ''' Set up the `Later` work queue. ''' self._makeQ = Later(self.parallel, self.name) self._makeQ.open() def shutdown(self): ''' Shut down the make queue and wait for it. ''' self._makeQ.close() self._makeQ.wait() def report(self, fp=None): ''' Report the make queue status. ''' D("REPORT...") if fp is None: fp = sys.stderr fp.write(str(self)) fp.write(': ') fp.write(repr(self._makeQ)) fp.write('\n') D("REPORTED") def _ticker(self): while True: time.sleep(5) self.report() @prop def namespaces(self): ''' The namespaces for this Maker: the built namespaces plus the special macros. ''' return self._namespaces + [SPECIAL_MACROS] def insert_namespace(self, ns): ''' Insert a macro namespace in front of the existing namespaces. ''' self._namespaces.insert(0, ns) @prop def makefiles(self): ''' The list of makefiles to consult, a tuple. It is not possible to add more makefiles after accessing this property. ''' _makefiles = self._makefiles if not _makefiles: _makefiles = [] makerc_envvar = ( os.path.splitext(os.path.basename(cs.pfx.cmd))[0] + 'rc').upper() makerc = os.environ.get(makerc_envvar) if makerc and os.path.exists(makerc): _makefiles.append(makerc) makefile = os.path.basename(cs.pfx.cmd).title() + 'file' _makefiles.append(makefile) self._makefiles = _makefiles if type(_makefiles) is not tuple: self._makefiles = _makefiles = tuple(_makefiles) return _makefiles def add_appendfile(self, filename): ''' Add another Mykefile as from the :append directive, to be sourced after the main sequence of Mykefiles. ''' self.appendfiles.append(filename) def debug_make(self, msg, *a, **kw): ''' Issue an INFO log message if the "make" debugging flag is set. ''' if self.debug.make: info(msg, *a, **kw) def debug_parse(self, msg, *a, **kw): ''' Issue an INFO log message if the "parse" debugging flag is set. ''' if self.debug.parse: info(msg, *a, **kw) def target_active(self, target): ''' Add this target to the set of "in progress" targets. ''' self.debug_make("note target \"%s\" as active", target.name) with self._active_lock: self.active.add(target) def target_inactive(self, target): ''' Remove this target from the set of "in progress" targets. ''' self.debug_make("note target %r as inactive (%s)", target.name, target.state) with self._active_lock: self.active.remove(target) def cancel_all(self): ''' Cancel all "in progress" targets. ''' self.debug_make("cancel_all!") with self._active_lock: Ts = list(self.active) for T in Ts: T.cancel() def defer(self, func, *a, **kw): ''' Submt a function that will run from the queue later. Return the LateFunction. ''' self.debug_make("defer %s(*%r, **%r)" % (func, a, kw)) MLF = self._makeQ.defer(func, *a, **kw) return MLF def after(self, LFs, func, *a, **kw): ''' Submit a function to be run after the supplied LateFunctions `LFs`, return a Result instance for collection. ''' if not isinstance(LFs, list): LFs = list(LFs) self.debug_make("after %s call %s(*%r, **%r)" % (LFs, func, a, kw)) R = Result("Maker.after(%s):%s" % (",".join(str(LF) for LF in LFs), func)) self._makeQ.after(LFs, R, func, *a, **kw) return R def make(self, targets): ''' Synchronous call to make targets in series. ''' ok = True with Pfx("%s.make(%s)", self, " ".join(targets)): for target in targets: if isinstance(target, str): T = self[target] else: T = target T.require() if T.get(): self.debug_make("MAKE %s: OK", T) else: self.debug_make("MAKE %s: FAILED", T) ok = False if self.fail_fast: self.debug_make("ABORT MAKE") break self.debug_make("%r: %s", targets, ok) return ok def __getitem__(self, name): ''' Return the specified Target. ''' return self.targets[name] def setDebug(self, flag, value): ''' Set or clear the named debug option. ''' with Pfx("setDebug(%r, %r)", flag, value): if not flag.isalpha() or not hasattr(self.debug, flag): raise AttributeError( "invalid debug flag, know: %s" % (",".join( sorted([F for F in dir(self.debug) if F.isalpha()])), )) if self.debug.flags: info("debug.%s = %s", flag, value) setattr(self.debug, flag, value) if flag == 'debug': # tweak global logging level also logger = logging.getLogger() log_level = logger.getEffectiveLevel() if value: if log_level > logging.DEBUG: logger.setLevel(logging.DEBUG) else: if log_level < logging.INFO: logger.setLevel(logging.INFO) def getopt(self, args, options=None): ''' Parse command line options. Returns (args, badopts) being remaining command line arguments and the error state (unparsed or invalid options encountered). ''' badopts = False opts, args = getopt.getopt(args, 'dD:eEf:ij:kmNnpqrRsS:tuvx') for opt, value in opts: with Pfx(opt): if opt == '-d': # debug mode self.setDebug('make', True) elif opt == '-D': for flag in [w.strip().lower() for w in value.split(',')]: if len(flag) == 0: # silently skip empty flag items continue if flag.startswith('-'): value = False flag = flag[1:] else: value = True try: self.setDebug(flag, value) except AttributeError as e: error("bad flag %r: %s", flag, e) badopts = True elif opt == '-f': self._makefiles.append(value) elif opt == '-j': try: value = int(value) except ValueError as e: error("invalid -j value: %s", e) badopts = True else: if value < 1: error("invalid -j value: %d, must be >= 1", value) badopts = True else: self.parallel = int(value) elif opt == '-k': self.fail_fast = False elif opt == '-n': self.no_action = True else: error("unimplemented") badopts = True return args, badopts def loadMakefiles(self, makefiles, parent_context=None): ''' Load the specified Makefiles; return success. Each top level Makefile named gets its own namespace prepended to the namespaces list. In this way later top level Makefiles' definitions override ealier ones while still detecting conflicts within a particular Makefile. Also, the default_target property is set to the first encountered target if not yet set. ''' ok = True for makefile in makefiles: self.debug_parse("load makefile: %s", makefile) first_target = None ns = {} self.insert_namespace(ns) for parsed_object in self.parse(makefile, parent_context): with Pfx(parsed_object.context): if isinstance(parsed_object, Exception): error("exception: %s", parsed_object) ok = False elif isinstance(parsed_object, Target): # record this Target in the Maker T = parsed_object self.debug_parse("add target %s", T) if '%' in T.name: # record this Target as a rule self.rules[T.name] = T else: self.targets[T.name] = T if first_target is None: first_target = T elif isinstance(parsed_object, Macro): self.debug_parse("add macro %s", parsed_object) ns[parsed_object.name] = parsed_object else: raise ValueError( f"unsupported parse item received: {type(parsed_object)}{parsed_object!r}" ) if first_target is not None: self.default_target = first_target return ok def parse(self, fp, parent_context=None, missing_ok=False): ''' Read a Mykefile and yield Macros and Targets. ''' from .make import Target, Action action_list = None # not in a target for context, line in readMakefileLines(self, fp, parent_context=parent_context, missing_ok=missing_ok): with Pfx(str(context)): if isinstance(line, OSError): e = line if e.errno == errno.ENOENT or e.errno == errno.EPERM: if missing_ok: continue e.context = context yield e break raise e try: if line.startswith(':'): # top level directive _, doffset = get_white(line, 1) word, offset = get_identifier(line, doffset) if not word: raise ParseError(context, doffset, "missing directive name") _, offset = get_white(line, offset) with Pfx(word): if word == 'append': if offset == len(line): raise ParseError(context, offset, "nothing to append") mexpr, offset = MacroExpression.parse( context, line, offset) assert offset == len(line) for include_file in mexpr( context, self.namespaces).split(): if include_file: if not os.path.isabs(include_file): include_file = os.path.join( realpath(dirname(fp.name)), include_file) self.add_appendfile(include_file) continue if word == 'import': if offset == len(line): raise ParseError(context, offset, "nothing to import") ok = True missing_envvars = [] for envvar in line[offset:].split(): if envvar: envvalue = os.environ.get(envvar) if envvalue is None: error("no $%s" % (envvar, )) ok = False missing_envvars.append(envvar) else: yield Macro( context, envvar, (), envvalue.replace('$', '$$')) if not ok: raise ValueError( "missing environment variables: %s" % (missing_envvars, )) continue if word == 'precious': if offset == len(line): raise ParseError( context, offset, "nothing to mark as precious") mexpr, offset = MacroExpression.parse( context, line, offset) self.precious.update(word for word in mexpr( context, self.namespaces).split() if word) continue raise ParseError(context, doffset, "unrecognised directive") if action_list is not None: # currently collating a Target if not line[0].isspace(): # new target or unindented assignment etc - fall through # action_list is already attached to targets, # so simply reset it to None to keep state action_list = None else: # action line _, offset = get_white(line) if offset >= len(line) or line[offset] != ':': # ordinary shell action action_silent = False if offset < len(line) and line[offset] == '@': action_silent = True offset += 1 A = Action(context, 'shell', line[offset:], silent=action_silent) self.debug_parse("add action: %s", A) action_list.append(A) continue # in-target directive like ":make" _, offset = get_white(line, offset + 1) directive, offset = get_identifier(line, offset) if not directive: raise ParseError( context, offset, "missing in-target directive after leading colon" ) A = Action(context, directive, line[offset:].lstrip()) self.debug_parse("add action: %s", A) action_list.append(A) continue try: macro = Macro.from_assignment(context, line) except ValueError: pass else: yield macro continue # presumably a target definition # gather up the target as a macro expression target_mexpr, offset = MacroExpression.parse(context, stopchars=':') if not context.text.startswith(':', offset): raise ParseError(context, offset, "no colon in target definition") prereqs_mexpr, offset = MacroExpression.parse( context, offset=offset + 1, stopchars=':') if offset < len( context.text) and context.text[offset] == ':': postprereqs_mexpr, offset = MacroExpression.parse( context, offset=offset + 1) else: postprereqs_mexpr = [] action_list = [] for target in target_mexpr(context, self.namespaces).split(): yield Target(self, target, context, prereqs=prereqs_mexpr, postprereqs=postprereqs_mexpr, actions=action_list) continue raise ParseError(context, 0, 'unparsed line') except ParseError as e: exception("%s", e) self.debug_parse("finish parse")
def __init__(self, recv, send, request_handler=None, name=None, packet_grace=None, tick=None): ''' Initialise the PacketConnection. Parameters: * `recv`: inbound binary stream. If this is an `int` it is taken to be an OS file descriptor, otherwise it should be a `cs.buffer.CornuCopyBuffer` or a file like object with a `read1` or `read` method. * `send`: outbound binary stream. If this is an `int` it is taken to be an OS file descriptor, otherwise it should be a file like object with `.write(bytes)` and `.flush()` methods. For a file descriptor sending is done via an os.dup() of the supplied descriptor, so the caller remains responsible for closing the original descriptor. * `packet_grace`: default pause in the packet sending worker to allow another packet to be queued before flushing the output stream. Default: `DEFAULT_PACKET_GRACE`s. A value of `0` will flush immediately if the queue is empty. * `request_handler`: an optional callable accepting (`rq_type`, `flags`, `payload`). The request_handler may return one of 5 values on success: * `None`: response will be 0 flags and an empty payload. * `int`: flags only. Response will be the flags and an empty payload. * `bytes`: payload only. Response will be 0 flags and the payload. * `str`: payload only. Response will be 0 flags and the str encoded as bytes using UTF-8. * `(int, bytes)`: Specify flags and payload for response. An unsuccessful request should raise an exception, which will cause a failure response packet. * `tick`: optional tick parameter, default `None`. If `None`, do nothing. If a Boolean, call `tick_fd_2` if true, otherwise do nothing. Otherwise `tick` should be a callable accepting a byteslike value. ''' if name is None: name = str(seq()) self.name = name if isinstance(recv, int): self._recv = CornuCopyBuffer.from_fd(recv) elif isinstance(recv, CornuCopyBuffer): self._recv = recv else: self._recv = CornuCopyBuffer.from_file(recv) if isinstance(send, int): self._send = os.fdopen(os.dup(send), 'wb') else: self._send = send if packet_grace is None: packet_grace = DEFAULT_PACKET_GRACE if tick is None: tick = lambda bs: None elif isinstance(tick, bool): if tick: tick = tick_fd_2 else: tick = lambda bs: None self.packet_grace = packet_grace self.request_handler = request_handler self.tick = tick # tags of requests in play against the local system self._channel_request_tags = {0: set()} self.notify_recv_eof = set() self.notify_send_eof = set() # LateFunctions for the requests we are performing for the remote system self._running = set() # requests we have outstanding against the remote system self._pending = {0: {}} # sequence of tag numbers # TODO: later, reuse old tags to prevent monotonic growth of tag field self._tag_seq = Seq(1) # work queue for local requests self._later = Later(4, name="%s:Later" % (self, )) self._later.open() # dispatch queue of Packets to send self._sendQ = IterableQueue(16) self._lock = Lock() self.closed = False # debugging: check for reuse of (channel,tag) etc self.__sent = set() self.__send_queued = set() # dispatch Thread to process received packets self._recv_thread = bg_thread(self._receive_loop, name="%s[_receive_loop]" % (self.name, )) # dispatch Thread to send data # primary purpose is to bundle output by deferring flushes self._send_thread = bg_thread(self._send_loop, name="%s[_send]" % (self.name, ))
class PacketConnection(object): ''' A bidirectional binary connection for exchanging requests and responses. ''' # special packet indicating end of stream EOF_Packet = Packet(is_request=True, channel=0, tag=0, flags=0, rq_type=0, payload=b'') # pylint: disable=too-many-arguments def __init__(self, recv, send, request_handler=None, name=None, packet_grace=None, tick=None): ''' Initialise the PacketConnection. Parameters: * `recv`: inbound binary stream. If this is an `int` it is taken to be an OS file descriptor, otherwise it should be a `cs.buffer.CornuCopyBuffer` or a file like object with a `read1` or `read` method. * `send`: outbound binary stream. If this is an `int` it is taken to be an OS file descriptor, otherwise it should be a file like object with `.write(bytes)` and `.flush()` methods. For a file descriptor sending is done via an os.dup() of the supplied descriptor, so the caller remains responsible for closing the original descriptor. * `packet_grace`: default pause in the packet sending worker to allow another packet to be queued before flushing the output stream. Default: `DEFAULT_PACKET_GRACE`s. A value of `0` will flush immediately if the queue is empty. * `request_handler`: an optional callable accepting (`rq_type`, `flags`, `payload`). The request_handler may return one of 5 values on success: * `None`: response will be 0 flags and an empty payload. * `int`: flags only. Response will be the flags and an empty payload. * `bytes`: payload only. Response will be 0 flags and the payload. * `str`: payload only. Response will be 0 flags and the str encoded as bytes using UTF-8. * `(int, bytes)`: Specify flags and payload for response. An unsuccessful request should raise an exception, which will cause a failure response packet. * `tick`: optional tick parameter, default `None`. If `None`, do nothing. If a Boolean, call `tick_fd_2` if true, otherwise do nothing. Otherwise `tick` should be a callable accepting a byteslike value. ''' if name is None: name = str(seq()) self.name = name if isinstance(recv, int): self._recv = CornuCopyBuffer.from_fd(recv) elif isinstance(recv, CornuCopyBuffer): self._recv = recv else: self._recv = CornuCopyBuffer.from_file(recv) if isinstance(send, int): self._send = os.fdopen(os.dup(send), 'wb') else: self._send = send if packet_grace is None: packet_grace = DEFAULT_PACKET_GRACE if tick is None: tick = lambda bs: None elif isinstance(tick, bool): if tick: tick = tick_fd_2 else: tick = lambda bs: None self.packet_grace = packet_grace self.request_handler = request_handler self.tick = tick # tags of requests in play against the local system self._channel_request_tags = {0: set()} self.notify_recv_eof = set() self.notify_send_eof = set() # LateFunctions for the requests we are performing for the remote system self._running = set() # requests we have outstanding against the remote system self._pending = {0: {}} # sequence of tag numbers # TODO: later, reuse old tags to prevent monotonic growth of tag field self._tag_seq = Seq(1) # work queue for local requests self._later = Later(4, name="%s:Later" % (self, )) self._later.open() # dispatch queue of Packets to send self._sendQ = IterableQueue(16) self._lock = Lock() self.closed = False # debugging: check for reuse of (channel,tag) etc self.__sent = set() self.__send_queued = set() # dispatch Thread to process received packets self._recv_thread = bg_thread(self._receive_loop, name="%s[_receive_loop]" % (self.name, )) # dispatch Thread to send data # primary purpose is to bundle output by deferring flushes self._send_thread = bg_thread(self._send_loop, name="%s[_send]" % (self.name, )) def __str__(self): return "PacketConnection[%s]" % (self.name, ) @pfx_method def shutdown(self, block=False): ''' Shut down the PacketConnection, optionally blocking for outstanding requests. Parameters: `block`: block for outstanding requests, default False. ''' with self._lock: if self.closed: # shutdown already called from another thread return # prevent further request submission either local or remote self.closed = True ps = self._pending_states() if ps: warning("PENDING STATES AT SHUTDOWN: %r", ps) # wait for completion of requests we're performing for LF in list(self._running): LF.join() # shut down sender, should trigger shutdown of remote receiver self._sendQ.close(enforce_final_close=True) self._send_thread.join() # we do not wait for the receiver - anyone hanging on outstaning # requests will get them as they come in, and in theory a network # disconnect might leave the receiver hanging anyway self._later.close() if block: self._later.wait() def join(self): ''' Wait for the receive side of the connection to terminate. ''' self._recv_thread.join() def _new_tag(self): return next(self._tag_seq) def _pending_states(self): ''' Return a list of ( (channel, tag), Request_State ) for the currently pending requests. ''' states = [] pending = self._pending for channel, channel_states in sorted(pending.items()): for tag, channel_state in sorted(channel_states.items()): states.append(((channel, tag), channel_state)) return states @locked def _pending_add(self, channel, tag, state): ''' Record some state against a (channel, tag). ''' pending = self._pending if channel not in pending: raise ValueError("unknown channel %d" % (channel, )) channel_info = pending[channel] if tag in channel_info: raise ValueError("tag %d already pending in channel %d" % (tag, channel)) self._pending[channel][tag] = state @locked def _pending_pop(self, channel, tag): ''' Retrieve and remove the state associated with (channel, tag). ''' pending = self._pending if channel not in pending: raise ValueError("unknown channel %d" % (channel, )) channel_info = pending[channel] if tag not in channel_info: raise ValueError("tag %d unknown in channel %d" % (tag, channel)) if False and tag == 15: raise RuntimeError("BANG") return channel_info.pop(tag) def _pending_cancel(self): ''' Cancel all the pending requests. ''' for chtag, _ in self._pending_states(): channel, tag = chtag warning("%s: cancel pending request %d:%s", self, channel, tag) _, result = self._pending_pop(channel, tag) result.cancel() def _queue_packet(self, P): sig = (P.channel, P.tag, P.is_request) if sig in self.__send_queued: raise RuntimeError("requeue of %s: %s" % (sig, P)) self.__send_queued.add(sig) try: self._sendQ.put(P) except ClosedError as e: warning("%s: packet not sent: %s (P=%s)", self._sendQ, e, P) def _reject(self, channel, tag, payload=bytes(())): ''' Issue a rejection of the specified request. ''' error("rejecting request: " + str(payload)) if isinstance(payload, str): payload = payload.encode('utf-8') self._queue_packet( Packet(is_request=False, channel=channel, tag=tag, flags=0, payload=payload)) def _respond(self, channel, tag, flags, payload): ''' Issue a valid response. Tack a 1 (ok) flag onto the flags and dispatch. ''' assert isinstance(channel, int) assert isinstance(tag, int) assert isinstance(flags, int) assert isinstance(payload, bytes) flags = (flags << 1) | 1 self._queue_packet( Packet(is_request=False, channel=channel, tag=tag, flags=flags, payload=payload)) @not_closed # pylint: disable=too-many-arguments def request(self, rq_type, flags=0, payload=b'', decode_response=None, channel=0): ''' Compose and dispatch a new request, returns a `Result`. Allocates a new tag, a Result to deliver the response, and records the response decode function for use when the response arrives. Parameters: * `rq_type`: request type code, an int * `flags`: optional flags to accompany the request, an int; default `0`. * `payload`: optional bytes-like object to accompany the request; default `b''` * `decode_response`: optional callable accepting (response_flags, response_payload_bytes) and returning the decoded response payload value; if unspecified, the response payload bytes are used The Result will yield an `(ok, flags, payload)` tuple, where: * `ok`: whether the request was successful * `flags`: the response flags * `payload`: the response payload, decoded by decode_response if specified ''' if rq_type < 0: raise ValueError("rq_type may not be negative (%s)" % (rq_type, )) # reserve type 0 for end-of-requests rq_type += 1 tag = self._new_tag() R = Result() self._pending_add(channel, tag, Request_State(decode_response, R)) self._queue_packet( Packet(is_request=True, channel=channel, tag=tag, flags=flags, rq_type=rq_type, payload=payload)) return R @not_closed def do(self, *a, **kw): ''' Synchronous request. Submits the request, then calls the `Result` returned from the request. ''' return self.request(*a, **kw)() @logexc # pylint: disable=too-many-arguments def _run_request(self, channel, tag, handler, rq_type, flags, payload): ''' Run a request and queue a response packet. ''' with Pfx( "_run_request[channel=%d,tag=%d,rq_type=%d,flags=0x%02x,payload=%s", channel, tag, rq_type, flags, repr(payload) if len(payload) <= 32 else repr(payload[:32]) + '...'): result_flags = 0 result_payload = b'' try: result = handler(rq_type, flags, payload) if result is not None: if isinstance(result, int): result_flags = result elif isinstance(result, bytes): result_payload = result elif isinstance(result, str): result_payload = result.encode( encoding='utf-8', errors='xmlcharrefreplace') else: result_flags, result_payload = result except Exception as e: # pylint: disable=broad-except exception("exception: %s", e) self._reject(channel, tag, "exception during handler") else: self._respond(channel, tag, result_flags, result_payload) self._channel_request_tags[channel].remove(tag) # pylint: disable=too-many-branches,too-many-statements,too-many-locals def _receive_loop(self): ''' Receive packets from upstream, decode into requests and responses. ''' XX = self.tick with PrePfx("_RECEIVE [%s]", self): with post_condition(("_recv is None", lambda: self._recv is None)): while True: try: XX(b'<') packet = Packet.parse(self._recv) except EOFError: break if packet == self.EOF_Packet: break channel = packet.channel tag = packet.tag flags = packet.flags payload = packet.payload if packet.is_request: # request from upstream client with Pfx("request[%d:%d]", channel, tag): if self.closed: debug("rejecting request: closed") # NB: no rejection packet sent since sender also closed elif self.request_handler is None: self._reject(channel, tag, "no request handler") else: requests = self._channel_request_tags if channel not in requests: # unknown channel self._reject(channel, tag, "unknown channel %d") elif tag in self._channel_request_tags[ channel]: self._reject( channel, tag, "channel %d: tag already in use: %d" % (channel, tag)) else: # payload for requests is the request enum and data rq_type = packet.rq_type if rq_type == 0: # magic EOF rq_type - must be malformed (!=EOF_Packet) error( "malformed EOF packet received: %s", packet) break # normalise rq_type rq_type -= 1 requests[channel].add(tag) # queue the work function and track it LF = self._later.defer( self._run_request, channel, tag, self.request_handler, rq_type, flags, payload) self._running.add(LF) LF.notify(self._running.remove) else: with Pfx("response[%d:%d]", channel, tag): # response: get state of matching pending request, remove state try: rq_state = self._pending_pop(channel, tag) except ValueError as e: # no such pending pair - response to unknown request error("%d.%d: response to unknown request: %s", channel, tag, e) else: decode_response, R = rq_state # first flag is "ok" ok = (flags & 0x01) != 0 flags >>= 1 payload = packet.payload if ok: # successful reply # return (True, flags, decoded-response) if decode_response is None: # return payload bytes unchanged R.result = (True, flags, payload) else: # decode payload try: result = decode_response( flags, payload) except Exception: # pylint: disable=broad-except R.exc_info = sys.exc_info() else: R.result = (True, flags, result) else: # unsuccessful: return (False, other-flags, payload-bytes) R.result = (False, flags, payload) # end of received packets: cancel any outstanding requests self._pending_cancel() # alert any listeners of receive EOF for notify in self.notify_recv_eof: notify(self) self._recv = None self.shutdown() # pylint: disable=too-many-branches def _send_loop(self): ''' Send packets upstream. Write every packet directly to self._send. Flush whenever the queue is empty. ''' XX = self.tick ##with Pfx("%s._send", self): with PrePfx("_SEND [%s]", self): with post_condition(("_send is None", lambda: self._send is None)): fp = self._send Q = self._sendQ grace = self.packet_grace for P in Q: sig = (P.channel, P.tag, P.is_request) if sig in self.__sent: raise RuntimeError("second send of %s" % (P, )) self.__sent.add(sig) try: XX(b'>') for bs in P.transcribe_flat(): fp.write(bs) if Q.empty(): # no immediately ready further packets: flush the output buffer if grace > 0: # allow a little time for further Packets to queue XX(b'Sg') sleep(grace) if Q.empty(): # still nothing XX(b'F') fp.flush() else: XX(b'F') fp.flush() except OSError as e: if e.errno == errno.EPIPE: warning("remote end closed") break raise try: XX(b'>EOF') for bs in self.EOF_Packet.transcribe_flat(): fp.write(bs) fp.close() except (OSError, IOError) as e: if e.errno == errno.EPIPE: debug("remote end closed: %s", e) elif e.errno == errno.EBADF: warning("local end closed: %s", e) else: raise except Exception as e: error("(_SEND) UNEXPECTED EXCEPTION: %s %s", e, e.__class__) raise self._send = None
class TestPipeline(unittest.TestCase): ''' Unit tests for pipelines. ''' @staticmethod def _f(x): return x * 2 @staticmethod def _delay(n): sleep(n) return n class _Bang(Exception): pass @staticmethod def _bang(): raise TestPipeline._Bang() def setUp(self): ''' Set up a Later, log to the terminal. ''' self.L = Later(2) self.L.open() self.L.logTo("/dev/tty") def tearDown(self): ''' Close the Later. ''' self.L.close() def test09pipeline_00noop(self): ''' Run a single stage one to one no-op pipeline. ''' with Later(1) as L: items = ['a', 'b', 'c', 'g', 'f', 'e'] P = pipeline(L, [(FUNC_ONE_TO_ONE, lambda x: x)], items) result = list(P.outQ) self.assertEqual(items, result) def test09pipeline_01idenitity(self): ''' Run a single stage one to many no-op pipeline. ''' L = self.L items = ['a', 'b', 'c', 'g', 'f', 'e'] def func(x): yield x P = pipeline(L, [(FUNC_ONE_TO_MANY, func)], items) self.assertIsNot(P.outQ, items) result = list(P.outQ) self.assertEqual(items, result) def test09pipeline_02double(self): ''' Run a single stage one to many pipeline. ''' L = self.L items = ['a', 'b', 'c', 'g', 'f', 'e'] expected = ['a', 'a', 'b', 'b', 'c', 'c', 'g', 'g', 'f', 'f', 'e', 'e'] def func(x): yield x yield x P = pipeline(L, [(FUNC_ONE_TO_MANY, func)], items) self.assertIsNot(P.outQ, items) result = list(P.outQ) # values may be interleaved due to parallelism self.assertEqual(len(result), len(expected)) self.assertEqual(sorted(result), sorted(expected)) def test09pipeline_03a_sort(self): ''' Run a single stage many to many pipeline doing a sort. ''' L = self.L items = ['a', 'b', 'c', 'g', 'f', 'e'] expected = ['a', 'b', 'c', 'e', 'f', 'g'] def func(x): return sorted(x) P = pipeline(L, [(FUNC_MANY_TO_MANY, func)], items) self.assertIsNot(P.outQ, items) result = list(P.outQ) self.assertEqual(result, expected) def test09pipeline_03b_set(self): ''' Run a single stage man to many pipeline. ''' L = self.L items = ['a', 'b', 'c', 'g', 'f', 'e'] expected = ['a', 'b', 'c', 'e', 'f', 'g'] def func(x): return set(x) P = pipeline(L, [(FUNC_MANY_TO_MANY, func)], items) self.assertIsNot(P.outQ, items) result = set(P.outQ) self.assertEqual(result, set(items)) def test09pipeline_04select(self): ''' Run a single stage selection pipeline. ''' L = self.L items = ['a', 'b', 'c', 'g', 'f', 'e'] want = ('a', 'f', 'c') expected = ['a', 'c', 'f'] def wanted(x): return x in want P = pipeline(L, [(FUNC_SELECTOR, wanted)], items) self.assertIsNot(P.outQ, items) result = list(P.outQ) self.assertEqual(result, expected) def test09pipeline_05two_by_two_by_sort(self): ''' Run a 3 stage pipeline with some fan out. ''' L = self.L items = ['a', 'b', 'c', 'g', 'f', 'e'] expected = [ 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', ] def double(x): yield x yield x P = pipeline(L, [(FUNC_ONE_TO_MANY, double), (FUNC_ONE_TO_MANY, double), (FUNC_MANY_TO_MANY, sorted)], items) self.assertIsNot(P.outQ, items) result = list(P.outQ) self.assertEqual(result, expected)