def _set_readahead_hints(self, roi, open_files): if not hasattr(os, 'posix_fadvise'): return if any([f.handle.fileno() is None for f in open_files]): return for f in open_files: os.posix_fadvise(f.handle.fileno(), 0, 0, os.POSIX_FADV_WILLNEED)
def __init__(self, reppath, context, ohash_log, refcount_log): self.context = context # Create a new opportunistic hash collection. self.col = opportunistic_hash.OpportunisticHashCollection( carvpathcontext=context, ohash_log=ohash_log) # We start off with zero open files self.openfiles = {} # Open the underlying data file and create if needed. self.fd = os.open( reppath, (os.O_RDWR | os.O_LARGEFILE | os.O_NOATIME | os.O_CREAT)) # Get the current repository total size. cursize = os.lseek(self.fd, 0, os.SEEK_END) # Set the entire repository as dontneed and assume everything to be # cold data for now. posix_fadvise(self.fd, 0, cursize, POSIX_FADV_DONTNEED) # Create CarvPath top entity of the proper size. self.top = self.context.make_top(size=cursize) # Create fadvise functor from fd. fadvise = _FadviseFunctor(fd=self.fd) # Create a referencecounting carvpath stack using our fadvise functor # and ohash collection. self.stack = refcount_stack.CarvpathRefcountStack( carvpathcontext=self.context, fadvise=fadvise, ohashcollection=self.col, refcount_log=refcount_log)
def _read(fd, fn, sequential, direct): try: if direct: if sequential is not None: fadv_sequential = os.POSIX_FADV_SEQUENTIAL fadv_random = os.POSIX_FADV_RANDOM advice = fadv_sequential if sequential else fadv_random os.posix_fadvise(fd, 0, 0, advice) def read(buf): data = fn(fd, buf) os.posix_fadvise(fd, read.offset, buf, os.POSIX_FADV_DONTNEED) read.offset += buf return data # NOTE: `nonlocal` statement is not available in Python 2. read.offset = 0 else: raise AttributeError except AttributeError: def read(buf): return fn(fd, buf) return read, fd
def _prefetch_for_tile(self, fileset, tile_ranges): prefr = _get_prefetch_ranges(len(fileset), tile_ranges) prefr = prefr[~np.all(prefr == 0, axis=1)] for mi, ma, fidx in prefr: f = fileset[fidx] os.posix_fadvise(f.handle.fileno(), mi, ma - mi, os.POSIX_FADV_WILLNEED)
def __init__(self, reppath, context, ohash_log, refcount_log): self.context = context # Create a new opportunistic hash collection. self.col = opportunistic_hash.OpportunisticHashCollection( carvpathcontext=context, ohash_log=ohash_log) # We start off with zero open files self.openfiles = {} # Open the underlying data file and create if needed. self.fd = os.open(reppath, (os.O_RDWR | os.O_LARGEFILE | os.O_NOATIME | os.O_CREAT)) # Get the current repository total size. cursize = os.lseek(self.fd, 0, os.SEEK_END) # Set the entire repository as dontneed and assume everything to be # cold data for now. posix_fadvise(self.fd, 0, cursize, POSIX_FADV_DONTNEED) # Create CarvPath top entity of the proper size. self.top = self.context.make_top(size=cursize) # Create fadvise functor from fd. fadvise = _FadviseFunctor(fd=self.fd) # Create a referencecounting carvpath stack using our fadvise functor # and ohash collection. self.stack = refcount_stack.CarvpathRefcountStack( carvpathcontext=self.context, fadvise=fadvise, ohashcollection=self.col, refcount_log=refcount_log)
def raw_baseband_frames(file_name: str, buf: bytes): """Iterates over frames in a raw baseband file""" with io.FileIO(file_name, "rb") as raw_file: while raw_file.readinto(buf): yield buf size = os.path.getsize(file_name) os.posix_fadvise(raw_file.fileno(), 0, size, os.POSIX_FADV_DONTNEED)
async def get(self): print("PID {} :getting file".format(os.getpid())) abs_path = os.path.abspath(self.get_argument('path')) if not os.access(abs_path, os.R_OK): raise web.HTTPError(status_code=404, reason="File Not Found or File Access Denied") file_size = os.path.getsize(abs_path) content_type, _ = mimetypes.guess_type(abs_path) if not content_type: self.set_header('Content-Type', "application/octet-stream") else: self.set_header('Content-Type', content_type) self.add_header( 'Content-Disposition', "attachment; filename={}".format(os.path.basename(abs_path))) self.add_header('Content-Length', file_size) chunk_size = 1024 * 1024 * 2 async with aiofiles.open( abs_path, "rb", buffering=0, loop=ioloop.IOLoop.current(), executor=self.application.thread_executor) as fp: # No buffering in address space and inform kernel to buffer aggressively for given file if os.name == "posix": os.posix_fadvise(fp.fileno(), 0, file_size, os.POSIX_FADV_WILLNEED) while True: chunk = await fp.read(chunk_size) if not chunk: break try: self.write(chunk) await self.flush() except iostream.StreamClosedError: break finally: del chunk # Used for metering/limiting request bandwidth or forced context switching for fast networks await asyncio.sleep(0.000000001) print("PID {} : sent file {}".format(os.getpid(), os.path.basename(abs_path)))
def sync(self): """ Synchronize file contents. Everything written prior to sync() must become durable before anything written after sync(). """ self.fd.flush() fdatasync(self.fileno) if hasattr(os, 'posix_fadvise'): os.posix_fadvise(self.fileno, 0, 0, os.POSIX_FADV_DONTNEED)
def write(self, block, data): if not self._writer: self._writer = open(self.io_name, 'rb+') offset = block.id * self._block_size self._writer.seek(offset) written = self._writer.write(data) os.posix_fadvise(self._writer.fileno(), offset, len(data), os.POSIX_FADV_DONTNEED) assert written == len(data)
def try_advise(file, offset, length): """Try to advise the OS on what file data is needed next""" try: if hasattr(file, "fileno"): posix_fadvise(file.fileno(), offset, length, POSIX_FADV_WILLNEED) except Exception as ex: print(ex, file=sys.stderr, flush=True)
def nbd_client(self, version_uid): self.subprocess_run(args=[ 'sudo', 'nbd-client', '127.0.0.1', '-p', str(self.SERVER_PORT), '-l' ], success_regexp='^Negotiation: ..\n{}\n$'.format( version_uid[0].v_string)) version_uid, size = version_uid self.subprocess_run( args=[ 'sudo', 'nbd-client', '-N', version_uid.v_string, '127.0.0.1', '-p', str(self.SERVER_PORT), self.NBD_DEVICE ], success_regexp= '^Negotiation: ..size = \d+MB\nbs=1024, sz=\d+ bytes\n$|^Negotiation: ..size = \d+MB|Connected /dev/nbd\d+$' ) count = 0 nbd_data = bytearray() with open(self.NBD_DEVICE, 'rb') as f: while True: data = f.read(64 * 1024 + random.randint(0, 8192)) if not data: break count += len(data) nbd_data += data self.assertEqual(size, count) image_data = self.read_file(self.testpath.path + '/image') logger.info('image_data size {}, nbd_data size {}'.format( len(image_data), len(nbd_data))) self.assertEqual(image_data, bytes(nbd_data)) f = os.open(self.NBD_DEVICE, os.O_RDWR) for offset in range(0, size, 4096): os.lseek(f, offset, os.SEEK_SET) data = self.random_bytes(4096) written = os.write(f, data) os.fsync(f) self.assertEqual(len(data), written) # Discard cache so that the read request below really goes to the NBD server os.posix_fadvise(f, offset, len(data), os.POSIX_FADV_DONTNEED) os.lseek(f, offset, os.SEEK_SET) read_data = os.read(f, 4096) self.assertEqual(data, read_data) os.close(f) self.subprocess_run(args=['sudo', 'nbd-client', '-d', self.NBD_DEVICE], success_regexp='^disconnect, sock, done\n$') # Signal NBD server to stop self.nbd_server.stop()
def _open(self): wrapper = super()._open() try: fd = wrapper.fileno() os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) except Exception: # in case either file descriptor cannot be retrieved or fadvise is not available # we should simply return the wrapper retrieved by FileHandler's open method # the advise to the kernel is just an advise and if we cannot give it, we won't pass return wrapper
def close_segment(self): if self._write_fd: self.segment += 1 self.offset = 0 self._write_fd.flush() os.fsync(self._write_fd.fileno()) if hasattr(os, 'posix_fadvise'): # only on UNIX # tell the OS that it does not need to cache what we just wrote, # avoids spoiling the cache for the OS and other processes. os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) self._write_fd.close() self._write_fd = None
def _set_readahead_hints(self, roi, fileset): if not hasattr(os, 'posix_fadvise'): return if roi is None: for f in fileset: os.posix_fadvise( f.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL | os.POSIX_FADV_WILLNEED) else: for f in fileset: os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_RANDOM | os.POSIX_FADV_WILLNEED)
def close_segment(self): if self._write_fd: self.segment += 1 self.offset = 0 self._write_fd.flush() os.fsync(self._write_fd.fileno()) if hasattr(os, 'posix_fadvise'): # python >= 3.3, only on UNIX # tell the OS that it does not need to cache what we just wrote, # avoids spoiling the cache for the OS and other processes. os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) self._write_fd.close() self._write_fd = None
def update(path, value, timestamp=None): """ update(path, value, timestamp=None) path is a string value is a float timestamp is either an int or float """ value = float(value) with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update(fh, value, timestamp)
def _set_readahead_hints(self, roi, fileset): if not hasattr(os, 'posix_fadvise'): return if any([f.fileno() is None for f in fileset]): return for f in fileset: os.posix_fadvise( f.fileno(), 0, 0, os.POSIX_FADV_WILLNEED )
def __getstate__(self): prefix = id(self) for idx, (filename, (offset, size)) in enumerate(self.file_chunks.items()): try: fd = os.open(filename, 'rb') posix_fadvise(fd, offset, size, POSIX_FADV_SEQUENTIAL) os.close(fd) except Exception: pass _, attacher = file_attachment(filename, offset, size, False) key = struct.pack('NN', prefix, idx) attach(key, attacher) return {'prefix': prefix, 'N': idx + 1}
def safe_fadvise(fd, offset, len, advice): if hasattr(os, 'posix_fadvise'): advice = getattr(os, 'POSIX_FADV_' + advice) try: os.posix_fadvise(fd, offset, len, advice) except OSError: # usually, posix_fadvise can't fail for us, but there seem to # be failures when running borg under docker on ARM, likely due # to a bug outside of borg. # also, there is a python wrapper bug, always giving errno = 0. # https://github.com/borgbackup/borg/issues/2095 # as this call is not critical for correct function (just to # optimize cache usage), we ignore these errors. pass
def update_many(path, points): """update_many(path,points) path is a string points is a list of (timestamp,value) points """ if not points: return points = [(int(t), float(v)) for (t, v) in points] points.sort(key=lambda p: p[0], reverse=True) # Order points by timestamp, newest first with open(path, 'r+b', BUFFERING) as fh: if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) return file_update_many(fh, points)
def ncc(self): '''Low-level. Calls ncc binary for appropriate platform, returns raw output as string. Behaves like "no-cache-cat" (ncc) but in pure-python. Only works on Unix, possibly Linux. If this does not work correctly it is a silent failure; self-testing is essential to ensure that non-caching reads are executed successfully. If not, fallback to custom compiled C binaries would be necessary to get readouts.''' if not self.ready: raise OpenPCRError("Device not ready, cannot read status.") filen = os.path.join(self.devicepath, 'STATUS.TXT') with open(filen, "rb") as InF: os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) fc = InF.read() # Return until first null character. # Odd null/whitespace pattern is incompatible with unicode mode. return fc.split(b"\0", 1)[0].decode()
def ncc(self): """Low-level. Calls ncc binary for appropriate platform, returns raw output as string. Behaves like "no-cache-cat" (ncc) but in pure-python. Only works on Unix, possibly Linux. If this does not work correctly it is a silent failure; self-testing is essential to ensure that non-caching reads are executed successfully. If not, fallback to custom compiled C binaries would be necessary to get readouts.""" if not self.ready: raise OpenPCRError("Device not ready, cannot read status.") filen = os.path.join(self.devicepath, "STATUS.TXT") with open(filen, "rb") as InF: os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) fc = InF.read() # Return until first null character. # Odd null/whitespace pattern is incompatible with unicode mode. return fc.split(b"\0", 1)[0].decode()
def hash_file(fn: Callable[[Union[bytes, memoryview]], None], fd: int, size: int, offset: int) -> int: """Repeatedly call a function on a slice of a file.""" buffsize = _BUFFSIZE done = 0 os.posix_fadvise(fd, offset, size, os.POSIX_FADV_SEQUENTIAL) if hasattr(os, "preadv"): # pragma: py-lt-37 preadv = cast( Callable[[int, List[bytearray], int], int], getattr(os, "preadv"), # noqa: B009 ) buff = bytearray(buffsize) bufflist = [buff] view = memoryview(buff) while size > 0: n = preadv(fd, bufflist, offset) n = min(n, size) if n < buffsize: fn(view[:n]) else: fn(view) done += n size -= n offset += n else: # Python <= 3.6 while size > 0: data = os.pread(fd, buffsize, offset) datasize = len(data) n = min(datasize, size) if n < datasize: fn(data[:n]) else: fn(data) done += n size -= n offset += n return done
def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock: offset = block.id * self._block_size t1 = time.time() with open(self._path, 'rb+') as f: f.seek(offset) written = f.write(data) os.posix_fadvise(f.fileno(), offset, len(data), os.POSIX_FADV_DONTNEED) t2 = time.time() logger.debug('{} wrote block {} in {:.2f}s'.format( threading.current_thread().name, block.id, t2 - t1, )) assert written == len(data) return block
def onPortDataChanged(self, port): """ Called when new data arrives at a port. :param port: the port where the new data is available. :return: """ if self._currentFile is None: # recording not active -> do nothing return s = self._currentFile["streams"][port.name()] sample = port.getData() # perform timestamp calculations if s.shape[0] > 0: lastDataTimestamp = self._lastDataTimestamp lastRcvTimestamp = self._lastRcvTimestamp else: lastDataTimestamp = sample.getTimestamp() lastRcvTimestamp = 0 if self._useRcvTimestamps: rcvTimestamp = np.int64(time.perf_counter_ns() - self._basetime) / 1000 else: rcvTimestamp = max(1, sample.getTimestamp() - lastDataTimestamp) self._lastDataTimestamp = np.int64(sample.getTimestamp()) self._lastRcvTimestamp = rcvTimestamp # append the new data to the existing HDF5 dataset s.resize((s.shape[0] + 1, )) s[-1:] = (np.frombuffer(sample.getContent(), dtype=np.uint8), sample.getDatatype(), np.int64(sample.getTimestamp()), rcvTimestamp) self._currentFile.flush() # status update once each second if (rcvTimestamp // 1000000) != (lastRcvTimestamp // 1000000): if hasattr(os, "posix_fadvise") and self.propertyCollection( ).getProperty("use_posix_fadvise_if_available"): os.posix_fadvise(self._currentFile.id.get_vfd_handle(), 0, self._currentFile.id.get_filesize(), os.POSIX_FADV_DONTNEED) self.statusUpdate.emit(self._name, rcvTimestamp * 1e-6, self._currentFile.id.get_filesize())
def _sha1file(self, node): """Checksum a single node (file) """ if DEBUG: print("_sha1file({})".format(node)) try: starttime = time.time() with open(node['Path'], 'rb') as f: # Caching: # This risks filling caches with what we're reading here, displacing potentially higher value items. # # To work this out trials of FADV options where mode. The lowest option was: # * on opening the file POSIX_FADV_NOREUSE # * before closing the file os.POSIX_FADV_DONTNEED # # Differences seem small, but this none the less is the lowest cache option consistently in 3 tests # # See: # man 2 posix_fadvise # https://stackoverflow.com/questions/15266115/read-file-without-disk-caching-in-linux os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_NOREUSE) sha = hashlib.sha1() data = ' ' # we start with something as the "last read" to ensure the loop starts blockcount = 0 while data: data = f.read(self.block_size) sha.update(data) blockcount += 1 # check on progress if blockcount >= self.block_burst: now = time.time() delay = self.burst_time - (now - starttime) if delay > 0.0: time.sleep(delay) starttime += self.burst_time else: # we're slipping - keep slipping starttime = now blockcount = 0 os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) f.close() return sha.hexdigest() except FileNotFoundError: return None
def _read(self, block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]: offset = block.id * self._block_size t1 = time.time() with open(self._path, 'rb') as f: f.seek(offset) data = f.read(block.size) os.posix_fadvise(f.fileno(), offset, block.size, os.POSIX_FADV_DONTNEED) t2 = time.time() if not data: raise EOFError('End of file reached on {} when there should be data.'.format(self.url)) logger.debug('{} read block {} in {:.2f}s'.format( threading.current_thread().name, block.id, t2 - t1, )) return block, data
def hexdigest_file(path: PathLike, algorithm: str) -> str: """Return the hexdigest of the file at `path` using `algorithm` Will stream the contents of file to the hash `algorithm` and return the hexdigest. If the specified `algorithm` is not supported a `ValueError` will be raised. """ hasher = hashlib.new(algorithm) with open(path, "rb") as f: os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) while True: data = f.read(BLOCKSIZE) if not data: break hasher.update(data) return hasher.hexdigest()
def __init__(self, pathname, writeable = False): ''' Constructor. @param pathname:str The pathname of the file to use. @param writeable:bool Should be file be open for writting too? ''' INTSIZE = 20 self.pathname = pathname self.fd = os.open(pathname, os.O_RDWR if writeable else os.O_RDONLY) try: os.posix_fadvise(self.fd, 0, 0, os.POSIX_FADV_RANDOM) except: pass self.width = int(self.__read(INTSIZE, INTSIZE * 0).decode('utf-8', 'strict')) self.items = int(self.__read(INTSIZE, INTSIZE * 1).decode('utf-8', 'strict')) self.size = int(self.__read(INTSIZE, INTSIZE * 2).decode('utf-8', 'strict')) self.removed = int(self.__read(INTSIZE, INTSIZE * 3).decode('utf-8', 'strict')) self.offset = INTSIZE * 4 self.xwidth = self.width + INTSIZE * 2
def __init__(self, file, prune, num_docs, vocab_size, in_memory, gpu): self.file = file = open(file, 'rb') mmp = mmap.mmap(file.fileno(), 0, flags=mmap.MAP_PRIVATE, prot=mmap.PROT_READ) if in_memory: # file will be read in in full sequentially os.posix_fadvise(file.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL) else: # file will be read randomly as needed os.posix_fadvise(file.fileno(), 0, 0, os.POSIX_FADV_RANDOM) if prune != 0: S_DTYPE = 2 if in_memory: mmp = np.empty((num_docs, prune), dtype='i2'), np.empty( (num_docs, prune), dtype='f2') for did in logger.pbar(range(num_docs), desc='loading dvecs'): try: mmp[0][did] = np.frombuffer(file.read(prune * S_DTYPE), dtype='i2') mmp[1][did] = np.frombuffer(file.read(prune * S_DTYPE), dtype='f2') except ValueError: pass file.close() self.lookup = self.dvec_lookup_pruned else: if in_memory: mmp = file.read() file.close() self.lookup = self.dvec_lookup_unpruned self.prune = prune self.num_docs = num_docs self.vocab_size = vocab_size self.mmp = mmp self.gpu = gpu self.in_memory = in_memory
def __getitem__(self, index): # if not isinstance(index, int): index = np.sort(index) if self.reopen_mem_map: self.event_data = np.memmap(self.path, mode="r", shape=self.shape, offset=self.offset, dtype=self.dtype) self.e = np.array(self.event_data[index, :, :, :19]) # self.event_data.read_direct(self.e,source_sel=np.s_[index,:,:,:19],dest_sel=np.s_[:]) if self.fadvise == 'file': os.posix_fadvise(self.fd.fileno(), 0, self.f.id.get_filesize(), os.POSIX_FADV_DONTNEED) elif self.fadvise == 'dataset': os.posix_fadvise(self.fd.fileno(), self.hdf5_event_data.id.get_offset(), self.hdf5_event_data.id.get_storage_size(), os.POSIX_FADV_DONTNEED) return self.e
def _read(self, block): with open(self.io_name, 'rb') as source_file: offset = block.id * self._block_size t1 = time.time() source_file.seek(offset) data = source_file.read(block.size) t2 = time.time() # throw away cache os.posix_fadvise(source_file.fileno(), offset, block.size, os.POSIX_FADV_DONTNEED) if not data: raise EOFError('EOF reached on source when there should be data.') data_checksum = data_hexdigest(self._hash_function, data) logger.debug('{} read block {} (checksum {}...) in {:.2f}s'.format( threading.current_thread().name, block.id, data_checksum[:16], t2 - t1, )) return block, data, data_checksum
def random(self, offset, size): posix_fadvise(self.fd, offset, size, POSIX_FADV_RANDOM)
def normal(self, offset,size): posix_fadvise(self.fd, offset, size, POSIX_FADV_NORMAL)
def __call__(self, offset, size, willneed): if willneed: posix_fadvise(self.fd, offset, size, POSIX_FADV_WILLNEED) else: posix_fadvise(self.fd, offset, size, POSIX_FADV_DONTNEED)
#!/usr/bin/env python3 import os import mmap import sys if sys.version_info[:2] < (3,3): # native posix_fadvise introduced in 3.3, can shim in with ctypes: import ctypes libc = ctypes.CDLL("libc.so.6") os.posix_fadvise = libc.posix_fadvise os.POSIX_FADV_NORMAL = 0 os.POSIX_FADV_RANDOM = 1 os.POSIX_FADV_SEQUENTIAL = 2 os.POSIX_FADV_WILLNEED = 3 os.POSIX_FADV_DONTNEED = 4 os.POSIX_FADV_NOREUSE = 5 if __name__ == "__main__": import sys with open(sys.argv[1],"rb") as InF: os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) print(InF.read().split(b"\0",1)[0].decode())
# }}} import argparse import os import sys import time if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("target") args = parser.parse_args() fd = os.open(args.target, os.O_WRONLY) # POSIX_FADV_NOREUSE is a no-op # os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_NOREUSE) os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) filesize = os.lseek(fd, 0, os.SEEK_END) os.lseek(fd, 0, os.SEEK_SET) bufsize = 512 * 4 * 1024 buf = bytearray([0] * bufsize) allwritten = 0 remainingdata = filesize filesize_mib = int(filesize / 2**20) start = time.time() laststatus = start buffered = 0 try: while remainingdata != 0: remainingdata = filesize - allwritten
def fadvise_sequential(descriptor): """ Try to advise the kernel to read from 'descriptor' sequentially. """ try: posix_fadvise(descriptor.fileno(), 0, 0, POSIX_FADV_SEQUENTIAL) except: pass
def create(path, archiveList, xFilesFactor=None, aggregationMethod=None, sparse=False, useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint, numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propagating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # Validate archive configurations... validateArchiveList(archiveList) # Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) with open(path, 'wb', BUFFERING) as fh: try: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) oldest = max([ secondsPerPoint * points for secondsPerPoint, points in archiveList ]) __writeHeaderMetadata(fh, aggregationMethod, oldest, xFilesFactor, len(archiveList)) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint, points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) # If configured to use fallocate and capable of fallocate use that, else # attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write(b'\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = b'\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) # Explicitly close the file to catch IOError on close() fh.close() except IOError: # if we got an IOError above, the file is either empty or half created. # Better off deleting it to avoid surprises later os.unlink(fh.name) raise
#!/usr/bin/env python3 import os import mmap import sys if sys.version_info[:2] < (3, 3): # native posix_fadvise introduced in 3.3, can shim in with ctypes: import ctypes libc = ctypes.CDLL("libc.so.6") os.posix_fadvise = libc.posix_fadvise os.POSIX_FADV_NORMAL = 0 os.POSIX_FADV_RANDOM = 1 os.POSIX_FADV_SEQUENTIAL = 2 os.POSIX_FADV_WILLNEED = 3 os.POSIX_FADV_DONTNEED = 4 os.POSIX_FADV_NOREUSE = 5 if __name__ == "__main__": import sys with open(sys.argv[1], "rb") as InF: os.posix_fadvise(InF.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) print(InF.read().split(b"\0", 1)[0].decode())
def close_fd(self, fd): if hasattr(os, 'posix_fadvise'): # only on UNIX os.posix_fadvise(fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) fd.close()
def sequential(self, offset, size): posix_fadvise(self.fd, offset, size, POSIX_FADV_SEQUENTIAL)
def noreuse(self, offset, size): posix_fadvise(self.fd, offset, size, POSIX_FADV_NOREUSE)
import hashlib from math import ceil import subprocess import time from datetime import timedelta SAME = b"0" DIFF = b"1" COMPLEN = len(SAME) # SAME/DIFF length LOCAL_FADVISE = 1 REMOTE_FADVISE = 2 if callable(getattr(os, "posix_fadvise", False)): from os import posix_fadvise, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED fadvise = lambda fileobj, offset, length, advice: posix_fadvise(fileobj.fileno(), offset, length, advice) else: try: from fadvise import set_advice, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED fadvise = lambda fileobj, offset, length, advice: set_advice(fileobj, advice, offset, length) except: fadvise = None if fadvise: USE_DONTNEED = sys.platform.startswith('linux') USE_NOREUSE = not(USE_DONTNEED) else: USE_NOREUSE = USE_DONTNEED = False def do_create(f, size): f = open(f, 'a', 0)
def uncache(path): fd = os.open(path, os.O_RDWR) os.fdatasync(fd) os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) os.close(fd)
def os_access_hint(file_obj): flags = POSIX_FADV_SEQUENTIAL | POSIX_FADV_WILLNEED posix_fadvise(file_obj.fileno(), 0, 0, flags)
def create(path, archiveList, xFilesFactor=None, aggregationMethod=None, sparse=False, useFallocate=False): """create(path,archiveList,xFilesFactor=0.5,aggregationMethod='average') path is a string archiveList is a list of archives, each of which is of the form (secondsPerPoint, numberOfPoints) xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur aggregationMethod specifies the function to use when propagating data (see ``whisper.aggregationMethods``) """ # Set default params if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # Validate archive configurations... validateArchiveList(archiveList) # Looks good, now we create the file and write the header if os.path.exists(path): raise InvalidConfiguration("File %s already exists!" % path) with open(path, 'wb', BUFFERING) as fh: try: if LOCK: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) if CAN_FADVISE and FADVISE_RANDOM: posix_fadvise(fh.fileno(), 0, 0, POSIX_FADV_RANDOM) oldest = max([secondsPerPoint * points for secondsPerPoint, points in archiveList]) __writeHeaderMetadata(fh, aggregationMethod, oldest, xFilesFactor, len(archiveList)) headerSize = metadataSize + (archiveInfoSize * len(archiveList)) archiveOffsetPointer = headerSize for secondsPerPoint, points in archiveList: archiveInfo = struct.pack(archiveInfoFormat, archiveOffsetPointer, secondsPerPoint, points) fh.write(archiveInfo) archiveOffsetPointer += (points * pointSize) # If configured to use fallocate and capable of fallocate use that, else # attempt sparse if configure or zero pre-allocate if sparse isn't configured. if CAN_FALLOCATE and useFallocate: remaining = archiveOffsetPointer - headerSize fallocate(fh, headerSize, remaining) elif sparse: fh.seek(archiveOffsetPointer - 1) fh.write(b'\x00') else: remaining = archiveOffsetPointer - headerSize chunksize = 16384 zeroes = b'\x00' * chunksize while remaining > chunksize: fh.write(zeroes) remaining -= chunksize fh.write(zeroes[:remaining]) if AUTOFLUSH: fh.flush() os.fsync(fh.fileno()) # Explicitly close the file to catch IOError on close() fh.close() except IOError: # if we got an IOError above, the file is either empty or half created. # Better off deleting it to avoid surprises later os.unlink(fh.name) raise