示例#1
0
    def copyfileobj(fsrc, fdst, length=16*1024, advise_after=1024*1024):
        """
        Reimplementation of shutil.copyfileobj that advises the OS to remove
        parts of the source file from the OS's caches once copied to the
        destination file.

        Usage profile:
            * You have a (potentially) large file to copy.
            * You know you don't need to access the source file once copied.
            * You're quite likely to access the destination file soon after.
        """
        # If we can't access the the fileno then fallback to using shutil.
        if not hasattr(fsrc, 'fileno'):
            return shutil.copyfileobj(fsrc, fdst, length)
        # Calculate the appoximate number of blocks to copy before advising the
        # OS to drop pages from the cache.
        advise_after_blocks = int(advise_after/length)
        # Off we go ...
        blocks_read = 0
        while True:
            data = fsrc.read(length)
            if not data:
                break
            fdst.write(data)
            blocks_read += 1
            if not blocks_read % advise_after_blocks:
                posix_fadvise(fsrc.fileno(), 0, length*blocks_read,
                              POSIX_FADV_DONTNEED)
        # One final advise to flush the remaining blocks.
        posix_fadvise(fsrc.fileno(), 0, 0, POSIX_FADV_DONTNEED)
def copyfileobj(fsrc, fdst, length=16 * 1024, advise_after=1024 * 1024):
    """
    Reimplementation of shutil.copyfileobj that advises the OS to remove
    parts of the source file from the OS's caches once copied to the
    destination file.

    Usage profile:
        * You have a (potentially) large file to copy.
        * You know you don't need to access the source file once copied.
        * You're quite likely to access the destination file soon after.
    """

    # If we can't access the fileno then fallback to using shutil.
    if not hasattr(fsrc, 'fileno'):
        return shutil.copyfileobj(fsrc, fdst, length)

    # Calculate the appoximate number of blocks to copy before advising the
    # OS to drop pages from the cache.
    advise_after_blocks = int(advise_after / length)

    # Off we go ...
    blocks_read = 0
    while 1:
        data = fsrc.read(length)
        if not data:
            break
        fdst.write(data)
        blocks_read += 1

        if not blocks_read % advise_after_blocks:
            posix_fadvise(fsrc.fileno(), 0, length * blocks_read,
                          POSIX_FADV_DONTNEED)

    # One final advise to flush the remaining blocks.
    posix_fadvise(fsrc.fileno(), 0, 0, POSIX_FADV_DONTNEED)
示例#3
0
def update_many(path, points):
  """update_many(path,points)

path is a string
points is a list of (timestamp,value) points
"""
  if not points: return
  points = [(int(t), float(v)) for (t, v) in points]
  points.sort(key=lambda p: p[0], reverse=True)  # Order points by timestamp, newest first
  with open(path, 'r+b') as fh:
    if CAN_FADVISE and FADVISE_RANDOM:
      posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)
    return file_update_many(fh, points)
示例#4
0
def update(path, value, timestamp=None):
  """
  update(path, value, timestamp=None)

  path is a string
  value is a float
  timestamp is either an int or float
  """
  value = float(value)
  with open(path, 'r+b') as fh:
    if CAN_FADVISE and FADVISE_RANDOM:
      posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)
    return file_update(fh, value, timestamp)
示例#5
0
def main():
    orig_file = sys.argv[1]
    new_model = sys.argv[2]
    if len(sys.argv) > 3:
        start_chunk = int(sys.argv[3])

    forig = open(orig_file)
    prev_pos = 0
    new_num = 0
    for line in sys.stdin:
        pos = int(line.strip())
        diff = pos - prev_pos
        assert diff >= 0
        if new_num >= start_chunk:
            filename = new_model % (new_num)
            with open(new_model % (new_num), 'w') as fdest:
                fadvise.posix_fadvise(forig.fileno(), 0, forig.tell(), fadvise.POSIX_FADV_DONTNEED)
                fadvise.posix_fadvise(forig.fileno(), forig.tell(), 0, fadvise.POSIX_FADV_SEQUENTIAL)
                fadvise.posix_fadvise(fdest.fileno(), 0, 0, fadvise.POSIX_FADV_SEQUENTIAL)
                copy(forig, fdest, diff)
        else:
            forig.seek(diff, os.SEEK_CUR)
        prev_pos = pos
        new_num += 1

    # Copy last chunk
    with open(new_model % (new_num), 'w') as fdest:
        copy(forig, fdest, 0)
def file_contents(fn, offset=None, length=None, binary=False):
    """Return the entire contents of a file, or of a specified segment.  Open
    the file in binary mode if specified.

    >>> file_contents('/dev/null')
    ''
    """
    mode = 'r' if not binary else 'rb'
    with open(fn, mode) as f:
        if offset is not None:
            return file_block_at_offset(f, offset, length)
        if length is not None:
            s = f.read(length)
            assert len(s) == length
            return s
        s = f.read()

        # tell the OS it can discard these pages, because the aggregate size
        # of the spectrum files may be huge, and we want to avoid pushing more
        # important stuff out of memory
        if fn.endswith('.ms2'):
            posix_fadvise(f.fileno(), 0, 0, POSIX_FADV_DONTNEED)

        return s
示例#7
0
def create(path, archiveList, xFilesFactor=None, aggregationMethod=None, sparse=False, useFallocate=False):
  """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average')

path is a string
archiveList is a list of archives, each of which is of the form (secondsPerPoint,numberOfPoints)
xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur
aggregationMethod specifies the function to use when propagating data (see ``whisper.aggregationMethods``)
"""
  # Set default params
  if xFilesFactor is None:
    xFilesFactor = 0.5
  if aggregationMethod is None:
    aggregationMethod = 'average'

  # Validate archive configurations...
  validateArchiveList(archiveList)

  # Looks good, now we create the file and write the header
  if os.path.exists(path):
    raise InvalidConfiguration("File %s already exists!" % path)

  with open(path, 'wb') as fh:
    try:
      if LOCK:
        fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
      if CAN_FADVISE and FADVISE_RANDOM:
        posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM)

      aggregationType = struct.pack(longFormat, aggregationMethodToType.get(aggregationMethod, 1))
      oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList])
      maxRetention = struct.pack(longFormat, oldest)
      xFilesFactor = struct.pack(floatFormat, float(xFilesFactor))
      archiveCount = struct.pack(longFormat, len(archiveList))
      packedMetadata = aggregationType + maxRetention + xFilesFactor + archiveCount
      fh.write(packedMetadata)
      headerSize = metadataSize + (archiveInfoSize * len(archiveList))
      archiveOffsetPointer = headerSize

      for secondsPerPoint, points in archiveList:
        archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points)
        fh.write(archiveInfo)
        archiveOffsetPointer += (points * pointSize)

      # If configured to use fallocate and capable of fallocate use that, else
      # attempt sparse if configure or zero pre-allocate if sparse isn't configured.
      if CAN_FALLOCATE and useFallocate:
        remaining = archiveOffsetPointer - headerSize
        fallocate(fh, headerSize, remaining)
      elif sparse:
        fh.seek(archiveOffsetPointer - 1)
        fh.write(b'\x00')
      else:
        remaining = archiveOffsetPointer - headerSize
        chunksize = 16384
        zeroes = b'\x00' * chunksize
        while remaining > chunksize:
          fh.write(zeroes)
          remaining -= chunksize
        fh.write(zeroes[:remaining])

      if AUTOFLUSH:
        fh.flush()
        os.fsync(fh.fileno())
      # Explicitly close the file to catch IOError on close()
      fh.close()
    except IOError:
      # if we got an IOError above, the file is either empty or half created.
      # Better off deleting it to avoid surprises later
      os.unlink(fh.name)
      raise