Пример #1
0
    def _fastcopy_sendfile(self, src: BinaryIO, dst: BinaryIO):
        """Based on :func:`shutil._fastcopy_sendfile`"""
        try:
            in_fd = src.fileno()
            out_fd = dst.fileno()
        except Exception as e:
            raise _GiveupOnFastCopy(e)  # not a regular file

        buf_size = self.sendfile_buf_size
        log.debug(
            f'\nUsing _fastcopy_sendfile with {buf_size=:,d} b [loop of os.sendfile]'
        )
        finished = self.finished.is_set
        try:
            while (sent := os.sendfile(out_fd, in_fd, self.copied,
                                       buf_size)) and not finished():
                self.copied += sent
        except OSError as e:
            e.filename = src.name  # provide more information in the error
            e.filename2 = dst.name
            if e.errno == errno.ENOTSOCK:
                global _USE_CP_SENDFILE
                # sendfile() on this platform (probably Linux < 2.6.33) does not support copies between regular
                # files (only sockets).
                _USE_CP_SENDFILE = False
                raise _GiveupOnFastCopy(e)
            elif e.errno == errno.ENOSPC:  # filesystem is full
                raise e from None
            elif self.copied == 0 and os.lseek(out_fd, 0, os.SEEK_CUR) == 0:
                raise _GiveupOnFastCopy(e)
            raise e
Пример #2
0
def blake2b(fd: BinaryIO) -> bytes:
    """Calculate the Blake2b digest of a file.

    :param fd: File-like object of which to calculate the digest

    :returns: Blake2b digest of the file contents
    """
    hasher = hashlib.blake2b(digest_size=16)

    size = os.fstat(fd.fileno()).st_size
    checksum.hash_file(hasher.update, fd.fileno(), size, 0)

    return hasher.digest()
Пример #3
0
 def __init__(self, fileObject: BinaryIO, copyMode: bool = False) -> None:
     self.fileObject = fileObject
     self.file = mmap.mmap(
         fileObject.fileno(),
         0,
         access=mmap.ACCESS_COPY if copyMode else mmap.ACCESS_READ)
     pass
Пример #4
0
def fromfile(
    file: BinaryIO,
    *,
    dtype: torch.dtype,
    byte_order: str,
    count: int = -1,
) -> torch.Tensor:
    """Construct a tensor from a binary file.
    .. note::
        This function is similar to :func:`numpy.fromfile` with two notable differences:
        1. This function only accepts an open binary file, but not a path to it.
        2. This function has an additional ``byte_order`` parameter, since PyTorch's ``dtype``'s do not support that
            concept.
    .. note::
        If the ``file`` was opened in update mode, i.e. "r+b" or "w+b", reading data is much faster. Be aware that as
        long as the file is still open, inplace operations on the returned tensor will reflect back to the file.
    Args:
        file (IO): Open binary file.
        dtype (torch.dtype): Data type of the underlying data as well as of the returned tensor.
        byte_order (str): Byte order of the data. Can be "little" or "big" endian.
        count (int): Number of values of the returned tensor. If ``-1`` (default), will read the complete file.
    """
    byte_order = "<" if byte_order == "little" else ">"
    char = "f" if dtype.is_floating_point else (
        "i" if dtype.is_signed else "u")
    item_size = (torch.finfo
                 if dtype.is_floating_point else torch.iinfo)(dtype).bits // 8
    np_dtype = byte_order + char + str(item_size)

    buffer: Union[memoryview, bytearray]
    if platform.system() != "Windows":
        # PyTorch does not support tensors with underlying read-only memory. In case
        # - the file has a .fileno(),
        # - the file was opened for updating, i.e. 'r+b' or 'w+b',
        # - the file is seekable
        # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it
        # to a mutable location afterwards.
        try:
            buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell():]
            # Reading from the memoryview does not advance the file cursor, so we have to do it manually.
            file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size,
                                                             io.SEEK_CUR))
        except (AttributeError, PermissionError, io.UnsupportedOperation):
            buffer = _read_mutable_buffer_fallback(file, count, item_size)
    else:
        # On Windows just trying to call mmap.mmap() on a file that does not support it, may corrupt the internal state
        # so no data can be read afterwards. Thus, we simply ignore the possible speed-up.
        buffer = _read_mutable_buffer_fallback(file, count, item_size)

    # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we
    # read the data with np.frombuffer() with the correct byte order and convert it to the native one with the
    # successive .astype() call.
    return torch.from_numpy(
        np.frombuffer(buffer, dtype=np_dtype, count=count).astype(np_dtype[1:],
                                                                  copy=False))
Пример #5
0
def write_to_file(file_fd: BinaryIO,
                  dir_fileno: Optional[int],
                  data: bytes,
                  fsync: bool = True):
    length_to_write = len(data)
    written = 0

    while written < length_to_write:
        # 这里应该使用 += 保留写入的总字节数
        written += file_fd.write(data[written:])
    if fsync:
        fsync_file_and_dir(file_fd.fileno(), dir_fileno)
Пример #6
0
def get_file_hash(fileobj: BinaryIO, algo_inst=None):
    """
    Compute the hash of a given file using algo, or update the contents of algo_inst if provided
    :param fileobj: file object that would be used
    :param algo_inst: instance of hashlib object. If None then md5 from hashlib is used.
    :returns new algo instance, or reference to algo_inst if provided
    """
    if algo_inst is None:
        algo_inst = md5()
    map = mmap.mmap(fileobj.fileno(), 0, prot=mmap.PROT_READ)
    algo_inst.update(map)
    map.close()
    return algo_inst
Пример #7
0
    def encrypt_stream(self, f_in: BinaryIO, f_out: BinaryIO, buffer_size: int = 1024) -> None:
        if buffer_size % self.BLOCK_SIZE != 0:
            raise ValueError('Buffer size must be a multiple of default block size (64)!')

        pbar = tqdm.tqdm(desc='Зашифрование', total=os.stat(f_in.fileno()).st_size, dynamic_ncols=True, colour='green',
                         leave=True)
        while data := f_in.read(buffer_size):
            out_buffer = array.array('B')
            pbar.update(buffer_size)
            for block in self.split_into_blocks(data):
                if len(block) < 8:
                    # "добиваем" блок данных незначащими нулями
                    block = block.ljust(8, b'\x00')
                out_buffer.extend(self.encrypt_bytes(block))
            out_buffer.tofile(f_out)
Пример #8
0
    def _ensure_length(t: int, file: BinaryIO, file_mmap: mmap) -> mmap:
        if len(file_mmap) > t:
            return file_mmap
        size = len(file_mmap)
        add_size = (t - size) * 2 + int(size / 2)

        file_mmap.flush()
        file_mmap.close()

        file.seek(size)
        file.write(b'\0' * add_size)
        file.flush()

        ans = mmap.mmap(file.fileno(), 0)
        assert (len(ans) > t)

        return ans
Пример #9
0
    def __init__(self,
                 file_obj: BinaryIO,
                 encoding: str = DEFAULT_ENCODING,
                 dtype: DType = DEFAULT_DTYPE,
                 out_dtype: Optional[DType] = None):
        super().__init__(out_dtype or dtype)

        self.dtype = numpy.dtype(dtype)
        encoding = _bom_free_version(encoding)
        self.encoding = encoding

        self._file_obj = file_obj
        self._mmap = mmap.mmap(file_obj.fileno(), 0, access=mmap.ACCESS_READ)

        self.header = self._read_header()
        self._body_start = self._mmap.tell(
        )  # store the position where the actual data starts

        self._vector_size_in_bytes = self.dtype.itemsize * self.header[
            'vector_size']
Пример #10
0
def write_message(logger: logging.Logger, fhandle: BinaryIO,
                  message: str) -> None:
    """Write message into file handle.

    Sets up SIGALRM and raises `ValueError` if alarm is due.
    """
    signal.signal(signal.SIGALRM, signal_handler)
    signal.alarm(5)
    try:
        fhandle_stat = os.fstat(fhandle.fileno())
        is_fifo = stat.S_ISFIFO(fhandle_stat.st_mode)
        if not is_fifo:
            raise ValueError('fhandle is expected to be a FIFO pipe')

        logger.debug('Will write %s', repr(message))
        fhandle.write(message.encode('utf-8'))
        signal.alarm(0)
    except Exception as exception:
        raise exception
    finally:
        signal.alarm(0)
Пример #11
0
def is_real_file(the_input: BinaryIO) -> bool:
    """
    Determine if the file-like object is associated with an actual file.
    This is mainly to consider suitability for establishment of a numpy.memmap.

    Parameters
    ----------
    the_input : BinaryIO

    Returns
    -------
    bool
    """

    if not hasattr(the_input, 'fileno'):
        return False
    try:
        fileno = the_input.fileno()
        return isinstance(fileno, int) and (fileno >= 0)
    except Exception:
        return False
Пример #12
0
def forward_stream(src_stream: BinaryIO, dst_stream: BinaryIO,
                   rank: str) -> None:
    for line in iter(src_stream.readline, b""):
        line = f"[rank={rank}] ".encode() + line
        os.write(dst_stream.fileno(), line)
Пример #13
0
def lockerf(
    infile: typing.BinaryIO,
    outfile: typing.BinaryIO,
    password: bytes,
    encrypting: bool,
    *,
    kdf: typing.Callable[[bytes, bytes, int], bytes] = PBKDF2_HMAC,
    aes_mode: Modes = Modes.MODE_GCM,
    blocksize: int = 16 * 1024,
    metadata: bytes = METADATA,
    dklen: int = 32,
    backend: typing.Optional[_Backends] = None,
) -> None:
    """Utility tool for encrypting files.

    This function reads from ``infile`` in blocks, specified by ``blocksize``,
    encrypts or decrypts the data and writes to ``outfile``. By design of
    the cipher wrapper for R/W to files, no intermediate copy of data is
    made during operation.

    Operation details:

    1. Password derivation

       The ``password`` is first derived into a key with PBKDF2-HMAC with
       32 byte salt, 150000 iterations, ``sha256`` as the hash algorithm.
       See ``kdf`` for more details on key derivation functions.

    2. Cipher creation

       The cipher is created with 12 byte nonce if mode is GCM else 16
       byte nonce. The nonce is stored as a part of ``Header`` for identifying
       the file, along with other important values.

       1. Authentication

          Before the operation begins, the authentication data is passed
          to the cipher. The authentication bits are::

              magic, mode, salt, metadata, nonce

          in that order.

    3. Finalization

       After completion of the entire operation, the tag created by the
       authenticatior of the cipher is written to the file as a part of
       ``Header``. If the file is being decrypted, it is read from the
       ``Header`` for verifying the file integrity and correct decryption.

    Note:
        If the cipher mode does not support authentication, HMAC is used.
        refer to the documentation of
        :class:`pyflocker.ciphers.base.BaseAEADCipher`.

    Args:
        infile (filelike):
            The file or file-like object to read from.
        outfile (filelike):
            The file or file-like object to write into.
        password (bytes, bytearray, memoryview):
            The password to use for encrypting the files.
        encrypting (bool):
            Whether the ``infile`` is being encrypted: True if encrypting
            else False.

    Keyword Arguments:
        kdf (callable):
            The key derivation function to use for deriving keys.
            :func:`hashlib.pbkdf2_hmac` is used with 150000 iterations and
            ``sha256`` as the hash algorithm.

            If a custom ``kdf`` is used, the ``kdf`` must accept 3 arguments,
            ``password``, ``salt`` and ``dklen``. It is assumed that the
            other required values are already passed to it. You can use a
            partial function (:py:func:`functools.partial`) for that purpose.
        aes_mode (:class:`~pyflocker.ciphers.modes.Modes`):
            The AES mode to use for encryption/decryption.
            The mode can be any attribute from :any:`Modes` except those
            which are defined in :obj:`pyflocker.ciphers.modes.special`.
            Defaults to :any:`Modes.MODE_GCM`.

            Specifying this value while decrypting has no effect.
        blocksize (int):
            The amount of data to read from ``infile`` in each iteration.
            Defalts to 16384.
        metadata (bytes, bytearray, memoryview):
            The metadata to write to the file. It must be up-to 32 bytes.
        dklen (int):
            The desired key length (in bytes) for passing to the cipher.
            It specifies the strength of AES cipher. Defaults to 32.
        backend (:any:`Backends`):
            The backend to use to instantiate the AES cipher from.
            If None is specified (the default), any available backend
            will be used.

    Returns:
        None

    Raises:
        DecryptionError: if password was invalid or the file was tampered
            with.
        NotImplementedError: if the mode is not supported.
        ValueError:
            If ``infile`` and ``outfile`` are same or key length is invalid.
        OverflowError: If the length of metadata exceeds 32 bytes.
        TypeError:
            It is raised if the header is invalid, or the metadata or magic
            number does not match.
    """
    if os.path.samefile(infile.fileno(), outfile.fileno()):
        raise ValueError("infile and outfile cannot be the same")

    # set defaults
    if aes_mode in special:
        raise NotImplementedError(f"{aes_mode} is not supported.")

    if len(metadata) > 32:
        raise OverflowError("maximum metadata length exceeded (limit: 32).")

    if not encrypting:
        header = _get_header(infile.read(HEADER_FORMAT.size), metadata)
    else:
        salt = os.urandom(32)
        nonce = os.urandom(12) if aes_mode == AES.MODE_GCM else os.urandom(16)
        header = _Header(MAGIC, aes_mode.value, nonce, b"", metadata, salt)
        outfile.write(HEADER_FORMAT.pack(*header))

    cipher = AES.new(
        encrypting,
        kdf(
            password=password,
            salt=header.salt,
            dklen=_check_key_length(dklen),
        ),
        Modes(header.mode),
        header.nonce,
        file=infile,
        backend=backend,
        tag_length=None,
    )
    cipher.authenticate(
        struct.pack(
            ">I H 32s 32s 16s",
            header.magic,  # XXX: MAGIC works just fine; good to be an idiot
            header.mode,
            header.salt,
            header.metadata,
            header.nonce,
        )
    )
    cipher.update_into(outfile, blocksize=blocksize, tag=header.tag)

    if encrypting:
        outfile.seek(struct.calcsize(">I H 16s"))
        outfile.write(cipher.calculate_tag())