Пример #1
0
 def test16remove(self):
     R1 = Range(self.items1)
     R1._check()
     R1.remove(3)
     R1._check()
     self.assertRaises(KeyError, R1.remove, 3)
     R1._check()
Пример #2
0
 def __init__(self, fs):
   self.fs = fs  # main filesystem
   self._allocated = Range()  # range of allocated inode numbers
   self._by_inum = {}
   self._by_uuid = {}
   self._by_dirent = {}
   self._lock = RLock()
Пример #3
0
 def test12copy(self):
     R1 = Range(self.items1)
     R2 = R1.copy()
     R2._check()
     self.assertTrue(R1 is not R2, "R1 is R2")
     self.assertEqual(R1, R2)
     self.assertEqual(R1._spans, R2._spans)
     self.assertEqual(list(R1.spans()), list(R2.spans()))
Пример #4
0
 def test13update00fromItems(self):
     R1 = Range(self.items1)
     R1._check()
     R1.update(self.items2)
     R1._check()
     self.assertEqual(list(R1), self.items1plus2)
     self.assertEqual(list(R1.spans()), self.spans1plus2)
Пример #5
0
 def test13update01fromSpans(self):
     R1 = Range(self.items1)
     R1._check()
     for span in self.spans2:
         R1.add_span(span[0], span[1])
         R1._check()
     self.assertEqual(list(R1), self.items1plus2)
     self.assertEqual(list(R1.spans()), self.spans1plus2)
Пример #6
0
 def test14union(self):
     R1 = Range(self.items1)
     R1._check()
     R2 = Range(self.items2)
     R2._check()
     R3 = R1.union(R2)
     R3._check()
     self.assertEqual(list(R3), self.items1plus2)
     self.assertEqual(list(list(R3.spans())), self.spans1plus2)
Пример #7
0
    def __init__(self,
                 pathname,
                 read_only=False,
                 write_only=False,
                 binary=False,
                 newline=None,
                 lock_ext=None,
                 lock_timeout=None,
                 poll_interval=None):
        ''' Initialise this SharedAppendFile.

        Parameters:
        * `pathname`: the pathname of the file to open.
        * `read_only`: set to true if we will not write updates.
        * `write_only`: set to true if we will not read updates.
        * `binary`: if the file is to be opened in binary mode, otherwise text mode.
        * 'newline`: passed to `open()`
        * `lock_ext`: lock file extension.
        * `lock_timeout`: maxmimum time to wait for obtaining the lock file.
        * `poll_interval`: poll time when taking a lock file,
          default `DEFAULT_POLL_INTERVAL`
    '''
        with Pfx("SharedAppendFile(%r): __init__", pathname):
            if poll_interval is None:
                poll_interval = DEFAULT_POLL_INTERVAL
            self.pathname = abspath(pathname)
            self.binary = binary
            self.newline = newline
            self.read_only = read_only
            self.write_only = write_only
            self.lock_ext = lock_ext
            self.lock_timeout = lock_timeout
            self.poll_interval = poll_interval
            if self.read_only:
                if self.write_only:
                    raise ValueError(
                        "only one of read_only and write_only may be true")
                o_flags = O_RDONLY
            elif self.write_only:
                o_flags = O_WRONLY | O_APPEND
            else:
                o_flags = O_RDWR | O_APPEND
            self._fd = os.open(self.pathname, o_flags)
            self._rfp = None
            self._read_offset = 0
            self._read_skip = Range()
            self._readlock = RLock()
            if not self.write_only:
                self._readopen()
            self.closed = False
Пример #8
0
 def test17symmetric_difference(self):
     R1 = Range(self.items1)
     R1._check()
     R2 = Range(self.items2)
     R2._check()
     R3 = R1.symmetric_difference(R2)
     R3._check()
     self.assertEqual(list(R3), self.items1xor2)
     self.assertEqual(list(list(R3.spans())), self.spans1xor2)
     R4 = R1 ^ R2
     R4._check()
     self.assertEqual(list(R4), self.items1xor2)
     self.assertEqual(list(list(R4.spans())), self.spans1xor2)
     self.assertEqual(R4, R3)
     self.assertTrue(R4 is not R3, "R4 is R3")
Пример #9
0
 def test30random_set_equivalence(self):
     R1 = Range()
     S = set()
     self.assertEqual(S, set(R1))
     for i in range(100):
         n = randint(0, 99)
         if randint(0, 1):
             ##X("add %d (new=%s)", n, n in S)
             R1.add(n)
             S.add(n)
         else:
             ##X("discard %d (extant=%s)", n, n in S)
             R1.discard(n)
             S.discard(n)
         ##X("S = %s", S)
         ##X("R1 = %s", R1)
         self.assertEqual(S, set(R1))  ## "set:%s vs Range:%s" % (S, R1))
Пример #10
0
class SharedAppendFile(object):
    ''' A base class to share a modifiable file between multiple users.

      The use case was driven from the shared CSV files used by
      `cs.nodedb.csvdb.Backend_CSVFile`, where multiple users can
      read from a common CSV file, and coordinate updates with a
      lock file.

      This presents the following interfaces:
      * `__iter__`: yields data chunks from the underlying file up
        to EOF; it blocks no more than reading from the file does.
        Note that multiple iterators share the same read pointer.

      * `open`: a context manager returning a writable file for writing
        updates to the file; it blocks reads from this instance
        (though not, of course, by other users of the file) and
        arranges that users of `__iter__` do not receive their own
        written data, thus arranging that `__iter__` returns only
        foreign file updates.

      Subclasses would normally override `__iter__` to parse the
      received data into their natural records.
  '''
    def __init__(self,
                 pathname,
                 read_only=False,
                 write_only=False,
                 binary=False,
                 newline=None,
                 lock_ext=None,
                 lock_timeout=None,
                 poll_interval=None):
        ''' Initialise this SharedAppendFile.

        Parameters:
        * `pathname`: the pathname of the file to open.
        * `read_only`: set to true if we will not write updates.
        * `write_only`: set to true if we will not read updates.
        * `binary`: if the file is to be opened in binary mode, otherwise text mode.
        * 'newline`: passed to `open()`
        * `lock_ext`: lock file extension.
        * `lock_timeout`: maxmimum time to wait for obtaining the lock file.
        * `poll_interval`: poll time when taking a lock file,
          default `DEFAULT_POLL_INTERVAL`
    '''
        with Pfx("SharedAppendFile(%r): __init__", pathname):
            if poll_interval is None:
                poll_interval = DEFAULT_POLL_INTERVAL
            self.pathname = abspath(pathname)
            self.binary = binary
            self.newline = newline
            self.read_only = read_only
            self.write_only = write_only
            self.lock_ext = lock_ext
            self.lock_timeout = lock_timeout
            self.poll_interval = poll_interval
            if self.read_only:
                if self.write_only:
                    raise ValueError(
                        "only one of read_only and write_only may be true")
                o_flags = O_RDONLY
            elif self.write_only:
                o_flags = O_WRONLY | O_APPEND
            else:
                o_flags = O_RDWR | O_APPEND
            self._fd = os.open(self.pathname, o_flags)
            self._rfp = None
            self._read_offset = 0
            self._read_skip = Range()
            self._readlock = RLock()
            if not self.write_only:
                self._readopen()
            self.closed = False

    def __str__(self):
        return "SharedAppendFile(%r)" % (self.pathname, )

    def close(self):
        ''' Close the SharedAppendFile: close input queue, wait for monitor to terminate.
    '''
        if self.closed:
            warning("multiple close of %s", self)
        self.closed = True

    def _readopen(self):
        ''' Open the file for read.
    '''
        assert not self.write_only
        mode = 'rb' if self.binary else 'r'
        fd = dup(self._fd)
        if self.binary:
            buffering = 0
        else:
            buffering = -1
        self._rfp = fdopen(fd, mode, buffering=buffering, newline=self.newline)
        self.open_state = self.filestate

    def _readclose(self):
        ''' Close the reader.
    '''
        assert not self.write_only
        rfp = self._rfp
        self._rfp = None
        rfp.close()
        self.closed = True

    def __iter__(self):
        ''' Iterate over the file, yielding data chunks until EOF.

        This skips data written to the file by this instance so that
        the data chunks returned are always foreign updates.
        Note that all iterators share the same file offset pointer.

        Usage:

            for chunk in f:
                ... process chunk ...
    '''
        assert not self.write_only
        while True:
            with self._readlock:
                # advance over any skip areas
                offset = self._read_offset
                skip = self._read_skip
                while skip and skip.start <= offset:
                    start0, end0 = skip.span0
                    if offset < end0:
                        offset = end0
                    skip.discard(start0, end0)
                read_size = DEFAULT_READSIZE
                if skip:
                    read_size = min(read_size, skip.span0.start - offset)
                    assert read_size > 0
                # gather data
                self._rfp.seek(offset)
                bs = self._rfp.read(read_size)
                self._read_offset = self._rfp.tell()
            if not bs:
                break
            yield bs

    def _lockfile(self):
        ''' Obtain an exclusive write lock on the CSV file.
        This arranges that multiple instances can coordinate writes.

        Usage:

            with self._lockfile():
                ... write data ...
    '''
        return lockfile(self.pathname,
                        ext=self.lock_ext,
                        poll_interval=self.poll_interval,
                        timeout=self.lock_timeout)

    @contextmanager
    def open(self):
        ''' Open the file for append write, returing a writable file.
        Iterators are blocked for the duration of the context manager.
    '''
        if self.read_only:
            raise RuntimeError(
                "attempt to write to read only SharedAppendFile")
        with self._lockfile():
            with self._readlock:
                mode = 'ab' if self.binary else 'a'
                fd = dup(self._fd)
                with fdopen(fd, mode, newline=self.newline) as wfp:
                    wfp.seek(0, SEEK_END)
                    start = wfp.tell()
                    yield wfp
                    end = wfp.tell()
                if end > start:
                    self._read_skip.add(start, end)
                if not self.write_only:
                    self._readopen()

    def tail(self):
        ''' A generator returning data chunks from the file indefinitely.

        This supports writing monitors for file updates.
        Note that this, like other iterators, shares the same file offset pointer.
        Also note that it calls the class' iterator, so that if a
        subsclass returns higher level records from its iterator,
        those records will also be returned from tail.

        Usage:

            for chunk in f:
                ... process chunk ...
    '''
        while True:
            for item in self:
                yield item
            if self.closed:
                return
            time.sleep(DEFAULT_TAIL_PAUSE)

    @property
    def filestate(self):
        ''' The current FileState of the backing file.
    '''
        fd = self._fd
        if fd is None:
            return None
        return FileState(fd)

    # TODO: need to notice filestate changes in other areas
    # TODO: support in place rewrite?
    @contextmanager
    def rewrite(self):
        ''' Context manager for rewriting the file.

        This writes data to a new file which is then renamed onto the original.
        After the switch, the read pointer is set to the end of the new file.

        Usage:

            with f.rewrite() as wfp:
                ... write data to wfp ...
    '''
        with self._readlock:
            with self.open() as _:
                tmpfp = mkstemp(dir=dirname(self.pathname), text=self.binary)
                try:
                    yield tmpfp
                finally:
                    if not self.write_only:
                        self._read_offset = tmpfp.tell()
                    tmpfp.close()
                    os.rename(tmpfp, self.pathname)
Пример #11
0
class Inodes:
  ''' Inode information for a filesystem.

      This consists of:
      - a Range denoting allocated inode numbers
      - a mapping of inode numbers to Inodes
      - a mapping of UUIDs to Inodes
      - a mapping of Dirents to Inodes

      Once an Inode is allocated it will have a reference by inum
      and Dirent. Since a Dirent need not have a UUID, it may not
      be mapped by UUID. The UUID map will be updated if `.add` is
      called later when the Dirent has a UUID, and clients should
      call `.add` to ensure that mapping if they rely on a Dirent's
      UUID, such as when making an IndirectDirent.
  '''

  def __init__(self, fs):
    self.fs = fs  # main filesystem
    self._allocated = Range()  # range of allocated inode numbers
    self._by_inum = {}
    self._by_uuid = {}
    self._by_dirent = {}
    self._lock = RLock()

  def load_fs_inode_dirents(self, D):
    ''' Load entries from an `fs_inode_dirents` Dir into the Inode table.
    '''
    X("LOAD FS INODE DIRENTS:")
    dump_Dirent(D)
    for name, E in D.entries.items():
      X("  name=%r, E=%r", name, E)
      with Pfx(name):
        # get the refcount from the :uuid:refcount" name
        _, refcount_s = name.split(':')[:2]
        I = self.add(E)
        I.refcount = int(refcount_s)
        X("  I=%s", I)

  def get_fs_inode_dirents(self):
    ''' Create an `fs_inode_dirents` Dir containing Inodes which
        should be preserved across mounts.
    '''
    D = Dir('fs_inode_dirents')
    for uuid, I in sorted(self._by_uuid.items()):
      if I.refcount > 0:
        D["%s:%d" % (uuid, I.refcount)] = I.E
      else:
        warning("refcount=%s, SKIP %s", I.refcount, I.E)
    X("GET FS INODE DIRENTS:")
    dump_Dirent(D)
    return D

  def _new_inum(self):
    ''' Allocate a new Inode number.
    '''
    allocated = self._allocated
    if allocated:
      span0 = allocated.span0
      inum = span0.end
    else:
      inum = 1
    allocated.add(inum)
    return inum

  def add(self, E, inum=None):
    ''' Add the Dirent `E` to the Inodes, return the new Inode.
        It is not an error to add the same Dirent more than once.
    '''
    with Pfx("Inodes.add(E=%s)", E):
      if E.isindirect:
        raise ValueError("indirect Dirents may not become Inodes")
      if inum is not None and inum < 1:
        raise ValueError("inum must be >= 1, got: %d" % (inum,))
      uu = E.uuid
      I = self._by_dirent.get(E)
      if I:
        assert I.E is E
        if inum is not None and I.inum != inum:
          raise ValueError(
              "inum=%d: Dirent already has an Inode with a different inum: %s"
              % (inum, I)
          )
        if uu:
          # opportunisticly update UUID mapping
          # in case the Dirent has acquired a UUID
          I2 = self._by_uuid.get(uu)
          if I2:
            assert I2.E is E
          else:
            self._by_uuid[uu] = I
        return I
      # unknown Dirent, create new Inode
      if inum is None:
        inum = self._new_inum()
      else:
        I = self._by_inum.get(inum)
        if I:
          raise ValueError("inum %d already allocated: %s" % (inum, I))
        self._allocated.add(inum)
      I = Inode(inum, E)
      self._by_dirent[E] = I
      self._by_inum[inum] = I
      if uu:
        self._by_uuid[uu] = I
      return I

  def __getitem__(self, ndx):
    if isinstance(ndx, int):
      try:
        I = self._by_inum[ndx]
      except KeyError as e:
        raise IndexError("unknown inode number %d: %s" % (ndx, e))
      return I
    if isinstance(ndx, UUID):
      return self._by_uuid[ndx]
    if isinstance(ndx, _Dirent):
      return self._by_dirent[ndx]
    raise TypeError("cannot deference indices of type %r" % (type(ndx),))

  def __contains__(self, ndx):
    try:
      _ = self[ndx]
    except (KeyError, IndexError):
      return False
    return True
Пример #12
0
 def test11equals(self):
     R1 = Range(self.items1)
     self.assertEqual(R1, R1)
     self.assertEqual(list(iter(R1)), sorted(self.items1))
Пример #13
0
 def test10init(self):
     R0 = Range()
     R0._check()
     self.assertEqual(list(R0.spans()), [])
     R0.update(self.items1)
     R0._check()
     self.assertEqual(list(R0.spans()), self.spans1)
     R1 = Range(self.items1)
     R1._check()
     self.assertEqual(list(R1.spans()), self.spans1)
     self.assertEqual(R0, R1)
     R2 = Range(self.items2)
     R2._check()
     self.assertEqual(list(R2.spans()), self.spans2)
Пример #14
0
 def test17difference_subset_superset(self):
     R1 = Range(self.items1)
     R1._check()
     R2 = Range(self.items2)
     R2._check()
     R3 = R1.difference(R2)
     R3._check()
     self.assertEqual(list(R3), self.items1minus2)
     self.assertEqual(list(list(R3.spans())), self.spans1minus2)
     self.assertTrue(R1.issuperset(R3))
     self.assertTrue(R3.issubset(R1))
     R4 = R1 - R2
     R4._check()
     self.assertEqual(list(R4), self.items1minus2)
     self.assertEqual(list(list(R4.spans())), self.spans1minus2)
     self.assertTrue(R1.issuperset(R4))
     self.assertTrue(R4.issubset(R1))
Пример #15
0
 def test15discard(self):
     R1 = Range(self.items1)
     R1._check()
     R2 = Range(self.items2)
     R2._check()
     ##X("R1 = %s", R1)
     ##X("R2 = %s", R2)
     R1.discard(R2)
     R1._check()
     ##X("post discard, R1 = %s", R1)
     ##X("items1minus2 = %s", self.items1minus2)
     self.assertEqual(list(R1), self.items1minus2)
     self.assertEqual(list(list(R1.spans())), self.spans1minus2)