def test16remove(self): R1 = Range(self.items1) R1._check() R1.remove(3) R1._check() self.assertRaises(KeyError, R1.remove, 3) R1._check()
def __init__(self, fs): self.fs = fs # main filesystem self._allocated = Range() # range of allocated inode numbers self._by_inum = {} self._by_uuid = {} self._by_dirent = {} self._lock = RLock()
def test12copy(self): R1 = Range(self.items1) R2 = R1.copy() R2._check() self.assertTrue(R1 is not R2, "R1 is R2") self.assertEqual(R1, R2) self.assertEqual(R1._spans, R2._spans) self.assertEqual(list(R1.spans()), list(R2.spans()))
def test13update00fromItems(self): R1 = Range(self.items1) R1._check() R1.update(self.items2) R1._check() self.assertEqual(list(R1), self.items1plus2) self.assertEqual(list(R1.spans()), self.spans1plus2)
def test13update01fromSpans(self): R1 = Range(self.items1) R1._check() for span in self.spans2: R1.add_span(span[0], span[1]) R1._check() self.assertEqual(list(R1), self.items1plus2) self.assertEqual(list(R1.spans()), self.spans1plus2)
def test14union(self): R1 = Range(self.items1) R1._check() R2 = Range(self.items2) R2._check() R3 = R1.union(R2) R3._check() self.assertEqual(list(R3), self.items1plus2) self.assertEqual(list(list(R3.spans())), self.spans1plus2)
def __init__(self, pathname, read_only=False, write_only=False, binary=False, newline=None, lock_ext=None, lock_timeout=None, poll_interval=None): ''' Initialise this SharedAppendFile. Parameters: * `pathname`: the pathname of the file to open. * `read_only`: set to true if we will not write updates. * `write_only`: set to true if we will not read updates. * `binary`: if the file is to be opened in binary mode, otherwise text mode. * 'newline`: passed to `open()` * `lock_ext`: lock file extension. * `lock_timeout`: maxmimum time to wait for obtaining the lock file. * `poll_interval`: poll time when taking a lock file, default `DEFAULT_POLL_INTERVAL` ''' with Pfx("SharedAppendFile(%r): __init__", pathname): if poll_interval is None: poll_interval = DEFAULT_POLL_INTERVAL self.pathname = abspath(pathname) self.binary = binary self.newline = newline self.read_only = read_only self.write_only = write_only self.lock_ext = lock_ext self.lock_timeout = lock_timeout self.poll_interval = poll_interval if self.read_only: if self.write_only: raise ValueError( "only one of read_only and write_only may be true") o_flags = O_RDONLY elif self.write_only: o_flags = O_WRONLY | O_APPEND else: o_flags = O_RDWR | O_APPEND self._fd = os.open(self.pathname, o_flags) self._rfp = None self._read_offset = 0 self._read_skip = Range() self._readlock = RLock() if not self.write_only: self._readopen() self.closed = False
def test17symmetric_difference(self): R1 = Range(self.items1) R1._check() R2 = Range(self.items2) R2._check() R3 = R1.symmetric_difference(R2) R3._check() self.assertEqual(list(R3), self.items1xor2) self.assertEqual(list(list(R3.spans())), self.spans1xor2) R4 = R1 ^ R2 R4._check() self.assertEqual(list(R4), self.items1xor2) self.assertEqual(list(list(R4.spans())), self.spans1xor2) self.assertEqual(R4, R3) self.assertTrue(R4 is not R3, "R4 is R3")
def test30random_set_equivalence(self): R1 = Range() S = set() self.assertEqual(S, set(R1)) for i in range(100): n = randint(0, 99) if randint(0, 1): ##X("add %d (new=%s)", n, n in S) R1.add(n) S.add(n) else: ##X("discard %d (extant=%s)", n, n in S) R1.discard(n) S.discard(n) ##X("S = %s", S) ##X("R1 = %s", R1) self.assertEqual(S, set(R1)) ## "set:%s vs Range:%s" % (S, R1))
class SharedAppendFile(object): ''' A base class to share a modifiable file between multiple users. The use case was driven from the shared CSV files used by `cs.nodedb.csvdb.Backend_CSVFile`, where multiple users can read from a common CSV file, and coordinate updates with a lock file. This presents the following interfaces: * `__iter__`: yields data chunks from the underlying file up to EOF; it blocks no more than reading from the file does. Note that multiple iterators share the same read pointer. * `open`: a context manager returning a writable file for writing updates to the file; it blocks reads from this instance (though not, of course, by other users of the file) and arranges that users of `__iter__` do not receive their own written data, thus arranging that `__iter__` returns only foreign file updates. Subclasses would normally override `__iter__` to parse the received data into their natural records. ''' def __init__(self, pathname, read_only=False, write_only=False, binary=False, newline=None, lock_ext=None, lock_timeout=None, poll_interval=None): ''' Initialise this SharedAppendFile. Parameters: * `pathname`: the pathname of the file to open. * `read_only`: set to true if we will not write updates. * `write_only`: set to true if we will not read updates. * `binary`: if the file is to be opened in binary mode, otherwise text mode. * 'newline`: passed to `open()` * `lock_ext`: lock file extension. * `lock_timeout`: maxmimum time to wait for obtaining the lock file. * `poll_interval`: poll time when taking a lock file, default `DEFAULT_POLL_INTERVAL` ''' with Pfx("SharedAppendFile(%r): __init__", pathname): if poll_interval is None: poll_interval = DEFAULT_POLL_INTERVAL self.pathname = abspath(pathname) self.binary = binary self.newline = newline self.read_only = read_only self.write_only = write_only self.lock_ext = lock_ext self.lock_timeout = lock_timeout self.poll_interval = poll_interval if self.read_only: if self.write_only: raise ValueError( "only one of read_only and write_only may be true") o_flags = O_RDONLY elif self.write_only: o_flags = O_WRONLY | O_APPEND else: o_flags = O_RDWR | O_APPEND self._fd = os.open(self.pathname, o_flags) self._rfp = None self._read_offset = 0 self._read_skip = Range() self._readlock = RLock() if not self.write_only: self._readopen() self.closed = False def __str__(self): return "SharedAppendFile(%r)" % (self.pathname, ) def close(self): ''' Close the SharedAppendFile: close input queue, wait for monitor to terminate. ''' if self.closed: warning("multiple close of %s", self) self.closed = True def _readopen(self): ''' Open the file for read. ''' assert not self.write_only mode = 'rb' if self.binary else 'r' fd = dup(self._fd) if self.binary: buffering = 0 else: buffering = -1 self._rfp = fdopen(fd, mode, buffering=buffering, newline=self.newline) self.open_state = self.filestate def _readclose(self): ''' Close the reader. ''' assert not self.write_only rfp = self._rfp self._rfp = None rfp.close() self.closed = True def __iter__(self): ''' Iterate over the file, yielding data chunks until EOF. This skips data written to the file by this instance so that the data chunks returned are always foreign updates. Note that all iterators share the same file offset pointer. Usage: for chunk in f: ... process chunk ... ''' assert not self.write_only while True: with self._readlock: # advance over any skip areas offset = self._read_offset skip = self._read_skip while skip and skip.start <= offset: start0, end0 = skip.span0 if offset < end0: offset = end0 skip.discard(start0, end0) read_size = DEFAULT_READSIZE if skip: read_size = min(read_size, skip.span0.start - offset) assert read_size > 0 # gather data self._rfp.seek(offset) bs = self._rfp.read(read_size) self._read_offset = self._rfp.tell() if not bs: break yield bs def _lockfile(self): ''' Obtain an exclusive write lock on the CSV file. This arranges that multiple instances can coordinate writes. Usage: with self._lockfile(): ... write data ... ''' return lockfile(self.pathname, ext=self.lock_ext, poll_interval=self.poll_interval, timeout=self.lock_timeout) @contextmanager def open(self): ''' Open the file for append write, returing a writable file. Iterators are blocked for the duration of the context manager. ''' if self.read_only: raise RuntimeError( "attempt to write to read only SharedAppendFile") with self._lockfile(): with self._readlock: mode = 'ab' if self.binary else 'a' fd = dup(self._fd) with fdopen(fd, mode, newline=self.newline) as wfp: wfp.seek(0, SEEK_END) start = wfp.tell() yield wfp end = wfp.tell() if end > start: self._read_skip.add(start, end) if not self.write_only: self._readopen() def tail(self): ''' A generator returning data chunks from the file indefinitely. This supports writing monitors for file updates. Note that this, like other iterators, shares the same file offset pointer. Also note that it calls the class' iterator, so that if a subsclass returns higher level records from its iterator, those records will also be returned from tail. Usage: for chunk in f: ... process chunk ... ''' while True: for item in self: yield item if self.closed: return time.sleep(DEFAULT_TAIL_PAUSE) @property def filestate(self): ''' The current FileState of the backing file. ''' fd = self._fd if fd is None: return None return FileState(fd) # TODO: need to notice filestate changes in other areas # TODO: support in place rewrite? @contextmanager def rewrite(self): ''' Context manager for rewriting the file. This writes data to a new file which is then renamed onto the original. After the switch, the read pointer is set to the end of the new file. Usage: with f.rewrite() as wfp: ... write data to wfp ... ''' with self._readlock: with self.open() as _: tmpfp = mkstemp(dir=dirname(self.pathname), text=self.binary) try: yield tmpfp finally: if not self.write_only: self._read_offset = tmpfp.tell() tmpfp.close() os.rename(tmpfp, self.pathname)
class Inodes: ''' Inode information for a filesystem. This consists of: - a Range denoting allocated inode numbers - a mapping of inode numbers to Inodes - a mapping of UUIDs to Inodes - a mapping of Dirents to Inodes Once an Inode is allocated it will have a reference by inum and Dirent. Since a Dirent need not have a UUID, it may not be mapped by UUID. The UUID map will be updated if `.add` is called later when the Dirent has a UUID, and clients should call `.add` to ensure that mapping if they rely on a Dirent's UUID, such as when making an IndirectDirent. ''' def __init__(self, fs): self.fs = fs # main filesystem self._allocated = Range() # range of allocated inode numbers self._by_inum = {} self._by_uuid = {} self._by_dirent = {} self._lock = RLock() def load_fs_inode_dirents(self, D): ''' Load entries from an `fs_inode_dirents` Dir into the Inode table. ''' X("LOAD FS INODE DIRENTS:") dump_Dirent(D) for name, E in D.entries.items(): X(" name=%r, E=%r", name, E) with Pfx(name): # get the refcount from the :uuid:refcount" name _, refcount_s = name.split(':')[:2] I = self.add(E) I.refcount = int(refcount_s) X(" I=%s", I) def get_fs_inode_dirents(self): ''' Create an `fs_inode_dirents` Dir containing Inodes which should be preserved across mounts. ''' D = Dir('fs_inode_dirents') for uuid, I in sorted(self._by_uuid.items()): if I.refcount > 0: D["%s:%d" % (uuid, I.refcount)] = I.E else: warning("refcount=%s, SKIP %s", I.refcount, I.E) X("GET FS INODE DIRENTS:") dump_Dirent(D) return D def _new_inum(self): ''' Allocate a new Inode number. ''' allocated = self._allocated if allocated: span0 = allocated.span0 inum = span0.end else: inum = 1 allocated.add(inum) return inum def add(self, E, inum=None): ''' Add the Dirent `E` to the Inodes, return the new Inode. It is not an error to add the same Dirent more than once. ''' with Pfx("Inodes.add(E=%s)", E): if E.isindirect: raise ValueError("indirect Dirents may not become Inodes") if inum is not None and inum < 1: raise ValueError("inum must be >= 1, got: %d" % (inum,)) uu = E.uuid I = self._by_dirent.get(E) if I: assert I.E is E if inum is not None and I.inum != inum: raise ValueError( "inum=%d: Dirent already has an Inode with a different inum: %s" % (inum, I) ) if uu: # opportunisticly update UUID mapping # in case the Dirent has acquired a UUID I2 = self._by_uuid.get(uu) if I2: assert I2.E is E else: self._by_uuid[uu] = I return I # unknown Dirent, create new Inode if inum is None: inum = self._new_inum() else: I = self._by_inum.get(inum) if I: raise ValueError("inum %d already allocated: %s" % (inum, I)) self._allocated.add(inum) I = Inode(inum, E) self._by_dirent[E] = I self._by_inum[inum] = I if uu: self._by_uuid[uu] = I return I def __getitem__(self, ndx): if isinstance(ndx, int): try: I = self._by_inum[ndx] except KeyError as e: raise IndexError("unknown inode number %d: %s" % (ndx, e)) return I if isinstance(ndx, UUID): return self._by_uuid[ndx] if isinstance(ndx, _Dirent): return self._by_dirent[ndx] raise TypeError("cannot deference indices of type %r" % (type(ndx),)) def __contains__(self, ndx): try: _ = self[ndx] except (KeyError, IndexError): return False return True
def test11equals(self): R1 = Range(self.items1) self.assertEqual(R1, R1) self.assertEqual(list(iter(R1)), sorted(self.items1))
def test10init(self): R0 = Range() R0._check() self.assertEqual(list(R0.spans()), []) R0.update(self.items1) R0._check() self.assertEqual(list(R0.spans()), self.spans1) R1 = Range(self.items1) R1._check() self.assertEqual(list(R1.spans()), self.spans1) self.assertEqual(R0, R1) R2 = Range(self.items2) R2._check() self.assertEqual(list(R2.spans()), self.spans2)
def test17difference_subset_superset(self): R1 = Range(self.items1) R1._check() R2 = Range(self.items2) R2._check() R3 = R1.difference(R2) R3._check() self.assertEqual(list(R3), self.items1minus2) self.assertEqual(list(list(R3.spans())), self.spans1minus2) self.assertTrue(R1.issuperset(R3)) self.assertTrue(R3.issubset(R1)) R4 = R1 - R2 R4._check() self.assertEqual(list(R4), self.items1minus2) self.assertEqual(list(list(R4.spans())), self.spans1minus2) self.assertTrue(R1.issuperset(R4)) self.assertTrue(R4.issubset(R1))
def test15discard(self): R1 = Range(self.items1) R1._check() R2 = Range(self.items2) R2._check() ##X("R1 = %s", R1) ##X("R2 = %s", R2) R1.discard(R2) R1._check() ##X("post discard, R1 = %s", R1) ##X("items1minus2 = %s", self.items1minus2) self.assertEqual(list(R1), self.items1minus2) self.assertEqual(list(list(R1.spans())), self.spans1minus2)