def copyfileobj(fsrc, fdst, length=16*1024, advise_after=1024*1024): """ Reimplementation of shutil.copyfileobj that advises the OS to remove parts of the source file from the OS's caches once copied to the destination file. Usage profile: * You have a (potentially) large file to copy. * You know you don't need to access the source file once copied. * You're quite likely to access the destination file soon after. """ # If we can't access the the fileno then fallback to using shutil. if not hasattr(fsrc, 'fileno'): return shutil.copyfileobj(fsrc, fdst, length) # Calculate the appoximate number of blocks to copy before advising the # OS to drop pages from the cache. advise_after_blocks = int(advise_after/length) # Off we go ... blocks_read = 0 while True: data = fsrc.read(length) if not data: break fdst.write(data) blocks_read += 1 if not blocks_read % advise_after_blocks: posix_fadvise(fsrc.fileno(), 0, length*blocks_read, POSIX_FADV_DONTNEED) # One final advise to flush the remaining blocks. posix_fadvise(fsrc.fileno(), 0, 0, POSIX_FADV_DONTNEED)
def copyfileobj(fsrc, fdst, length=16 * 1024, advise_after=1024 * 1024): """ Reimplementation of shutil.copyfileobj that advises the OS to remove parts of the source file from the OS's caches once copied to the destination file. Usage profile: * You have a (potentially) large file to copy. * You know you don't need to access the source file once copied. * You're quite likely to access the destination file soon after. """ # If we can't access the fileno then fallback to using shutil. if not hasattr(fsrc, 'fileno'): return shutil.copyfileobj(fsrc, fdst, length) # Calculate the appoximate number of blocks to copy before advising the # OS to drop pages from the cache. advise_after_blocks = int(advise_after / length) # Off we go ... blocks_read = 0 while 1: data = fsrc.read(length) if not data: break fdst.write(data) blocks_read += 1 if not blocks_read % advise_after_blocks: posix_fadvise(fsrc.fileno(), 0, length * blocks_read, POSIX_FADV_DONTNEED) # One final advise to flush the remaining blocks. posix_fadvise(fsrc.fileno(), 0, 0, POSIX_FADV_DONTNEED)
def update_many(path, points): """update_many(path,points) path is a string points is a list of (timestamp,value) points """ if not points: return points = [(int(t), float(v)) for (t, v) in points] points.sort(key=lambda p: p[0], reverse=True) # Order points by timestamp, newest first with open(path, 'r+b') as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update_many(fh, points)
def update(path, value, timestamp=None): """ update(path, value, timestamp=None) path is a string value is a float timestamp is either an int or float """ value = float(value) with open(path, 'r+b') as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update(fh, value, timestamp)
def main(): orig_file = sys.argv[1] new_model = sys.argv[2] if len(sys.argv) > 3: start_chunk = int(sys.argv[3]) forig = open(orig_file) prev_pos = 0 new_num = 0 for line in sys.stdin: pos = int(line.strip()) diff = pos - prev_pos assert diff >= 0 if new_num >= start_chunk: filename = new_model % (new_num) with open(new_model % (new_num), 'w') as fdest: fadvise.posix_fadvise(forig.fileno(), 0, forig.tell(), fadvise.POSIX_FADV_DONTNEED) fadvise.posix_fadvise(forig.fileno(), forig.tell(), 0, fadvise.POSIX_FADV_SEQUENTIAL) fadvise.posix_fadvise(fdest.fileno(), 0, 0, fadvise.POSIX_FADV_SEQUENTIAL) copy(forig, fdest, diff) else: forig.seek(diff, os.SEEK_CUR) prev_pos = pos new_num += 1 # Copy last chunk with open(new_model % (new_num), 'w') as fdest: copy(forig, fdest, 0)
def file_contents(fn, offset=None, length=None, binary=False): """Return the entire contents of a file, or of a specified segment. Open the file in binary mode if specified. >>> file_contents('/dev/null') '' """ mode = 'r' if not binary else 'rb' with open(fn, mode) as f: if offset is not None: return file_block_at_offset(f, offset, length) if length is not None: s = f.read(length) assert len(s) == length return s s = f.read() # tell the OS it can discard these pages, because the aggregate size # of the spectrum files may be huge, and we want to avoid pushing more # important stuff out of memory if fn.endswith('.ms2'): posix_fadvise(f.fileno(), 0, 0, POSIX_FADV_DONTNEED) return s
def create(path, archiveList, xFilesFactor=None, aggregationMethod=None, sparse=False, useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint,numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propagating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # Validate archive configurations... validateArchiveList(archiveList) # Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) with open(path, 'wb') as fh: try: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) aggregationType = struct.pack(longFormat, aggregationMethodToType.get(aggregationMethod, 1)) oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList]) maxRetention = struct.pack(longFormat, oldest) xFilesFactor = struct.pack(floatFormat, float(xFilesFactor)) archiveCount = struct.pack(longFormat, len(archiveList)) packedMetadata = aggregationType + maxRetention + xFilesFactor + archiveCount fh.write(packedMetadata) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint, points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) # If configured to use fallocate and capable of fallocate use that, else # attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write(b'\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = b'\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) # Explicitly close the file to catch IOError on close() fh.close() except IOError: # if we got an IOError above, the file is either empty or half created. # Better off deleting it to avoid surprises later os.unlink(fh.name) raise