Пример #1
0
 def open_file_python(self, path_or_file: PathOrFile, mode: ModeArg,
                      **kwargs) -> FileLike:
     if isinstance(mode, str):
         mode = FileMode(mode)
     if mode.text:
         return io.TextIOWrapper(
             self.lib.BZ2File(path_or_file, mode.access.value, **kwargs))
     else:
         return self.lib.BZ2File(path_or_file, mode.value, **kwargs)
Пример #2
0
    def open_file(self,
                  path: str,
                  mode: ModeArg,
                  use_system: bool = True,
                  **kwargs) -> FileLike:
        """Opens a compressed file for reading or writing.
        
        If ``use_system`` is True and the system provides an accessible
        executable, then system-level compression is used. Otherwise defaults
        to using the python implementation.
        
        Args:
            path: The path of the file to open.
            mode: The file open mode.
            use_system: Whether to attempt to use system-level compression.
            kwargs: Additional arguments to pass to the python-level open
                method, if system-level compression isn't used.
        
        Returns:
            A file-like object.
        """
        if isinstance(mode, str):
            mode = FileMode(mode)

        if use_system:
            # pylint: disable=redefined-variable-type
            gzfile = None  # type: FileLikeInterface
            if mode.readable and self.can_use_system_compression:
                gzfile = SystemReader(self.compress_path, path,
                                      self.get_command('d', src=path),
                                      self.compress_name)
            elif not mode.readable and self.can_use_system_decompression:
                bin_mode = FileMode(access=mode.access,
                                    coding=ModeCoding.BINARY)
                gzfile = SystemWriter(self.decompress_path, path, bin_mode,
                                      self.get_command('c'),
                                      self.decompress_name)
            if gzfile:
                if mode.text:
                    return io.TextIOWrapper(gzfile)
                else:
                    return gzfile

        return self.open_file_python(path, mode, **kwargs)
Пример #3
0
 def open_file_python(self, path_or_file: PathOrFile, mode: ModeArg,
                      **kwargs) -> FileLike:
     # pylint: disable=redefined-variable-type
     if isinstance(mode, str):
         mode = FileMode(mode)
     compressed_file = self.lib.open(path_or_file, mode.value, **kwargs)
     if mode.binary:
         if mode.readable:
             compressed_file = io.BufferedReader(compressed_file)
         else:
             compressed_file = io.BufferedWriter(compressed_file)
     return compressed_file
Пример #4
0
 def open_file_python(self, path_or_file: PathOrFile, mode: ModeArg,
                      **kwargs) -> FileLike:
     """Open a file using the python library.
     
     Args:
         f: The file to open -- a path or open file object.
         mode: The file open mode.
         kwargs: Additional arguments to pass to the open method.
     
     Returns:
         A file-like object.
     """
     if isinstance(mode, str):
         mode = FileMode(mode)
     return self.lib.open(path_or_file, mode.value, **kwargs)
Пример #5
0
    def peek(self, size: int = 1) -> AnyChar:
        """Return bytes/characters from the stream without advancing the
        position. At most one single read on the raw stream is done to satisfy
        the call.

        Args:
            size: The max number of bytes/characters to return.

        Returns:
            At most `size` bytes/characters. Unlike io.BufferedReader.peek(),
            will never return more than `size` bytes/characters.

        Notes:
            If the file uses multi-byte encoding and N characters are desired,
            it is up to the caller to request `size=2N`.
        """
        if not FileMode(self._fileobj.mode).readable:
            raise IOError("Can only call peek() on a readable file")

        if hasattr(self._fileobj, "peek"):
            # The underlying file has a peek() method
            peek = getattr(self._fileobj, "peek")(size)
            # I don't think the following is a valid state
            # if 't' in self._fileobj.mode:
            #     if isinstance(peek, 'bytes'):
            #         if hasattr(self._fileobj, 'encoding'):
            #             peek = peek_bytes.decode(self._fileobj.encoding)
            #         else:
            #             peek = peek_bytes.decode()
            if len(peek) > size:
                peek = peek[:size]
        elif hasattr(self._fileobj, "seek"):
            # The underlying file has a seek() method
            curpos = self._fileobj.tell()
            try:
                peek = self._fileobj.read(size)
            finally:
                self._fileobj.seek(curpos)
        else:  # pragma: no-cover
            # TODO I don't think it's possible to get here, but leaving for now
            raise IOError("Unpeekable file: {}".format(self.name))
        return peek
Пример #6
0
 def __init__(self,
              files: FilesArg = None,
              access: ModeAccessArg = 'w',
              char_mode: CharMode = None,
              linesep: CharMode = None,
              encoding: str = 'utf-8',
              header: CharMode = None) -> None:
     super().__init__(mode=FileMode(
         access=access, coding='t' if char_mode == TextMode else 'b'),
                      header=header)
     self.access = access
     self.char_mode = char_mode  # type: CharMode
     self._empty = cast(
         CharMode, b'' if char_mode == BinMode else '')  # type: CharMode
     self.encoding = encoding  # type: str
     self.num_lines = 0  # type: int
     self.linesep = linesep  # type: CharMode
     self._linesep_len = len(linesep)  # type: int
     if files:
         self.add_all(files)
Пример #7
0
 def __init__(self,
              executable_path: PathLike,
              path: PathLike,
              mode: ModeArg = 'w',
              command: List[str] = None,
              executable_name: str = None) -> None:
     super().__init__(path)
     self.executable_name = (executable_name
                             or os.path.basename(str(executable_path)))
     self.command = command or [self.executable_name]
     if isinstance(mode, str):
         mode = FileMode(mode)
     self.outfile = open(str(path), mode.value)
     self.devnull = open(os.devnull, 'w')
     try:
         self.process = Popen(self.command,
                              stdin=PIPE,
                              stdout=self.outfile,
                              stderr=self.devnull)
     except IOError:  # pragma: no-cover
         self.outfile.close()
         self.devnull.close()
         raise
Пример #8
0
 def __init__(
     self,
     files: FilesArg = None,
     access: ModeAccessArg = "w",
     char_mode: Optional[CharMode] = None,
     linesep: Optional[CharMode] = None,
     encoding: str = "utf-8",
     header: Optional[CharMode] = None,
 ) -> None:
     super().__init__(
         mode=FileMode(access=access,
                       coding="t" if char_mode == TextMode else "b"),
         header=header,
     )
     self.access = access
     self.char_mode: CharMode = char_mode
     self._empty: CharMode = cast(CharMode,
                                  b"" if char_mode == BinMode else "")
     self.encoding: str = encoding
     self.num_lines: int = 0
     self.linesep: CharMode = linesep
     self._linesep_len: int = len(linesep)
     if files:
         self.add_all(files)
Пример #9
0
def linecount(path_or_file: PathOrFile,
              linesep: Optional[bytes] = None,
              buffer_size: int = 1024 * 1024,
              **kwargs) -> int:
    """Fastest pythonic way to count the lines in a file.

    Args:
        path_or_file: File object, or path to the file.
        linesep: Line delimiter, specified as a byte string (e.g. b'\\n').
        buffer_size: How many bytes to read at a time (1 Mb by default).
        kwargs: Additional arguments to pass to the file open method.

    Returns:
        The number of lines in the file. Blank lines (including the last line
        in the file) are included.
    """
    if buffer_size < 1:
        raise ValueError("'buffer_size' must be >= ")
    if linesep is None:
        linesep = os.linesep.encode()
    if "mode" not in kwargs:
        kwargs["mode"] = "rb"
    elif FileMode(kwargs["mode"]).value != "rb":
        raise ValueError("File must be opened with mode 'rb'")
    with open_(path_or_file, **kwargs) as fileobj:
        if fileobj is None:
            return -1
        read_f = fileobj.read  # loop optimization
        buf = read_f(buffer_size)
        if len(buf) == 0:  # undefined file case
            return 0
        lines = 1
        while buf:
            lines += buf.count(linesep)
            buf = read_f(buffer_size)
        return lines
Пример #10
0
def xopen(
    target: OpenArg,
    mode: ModeArg = None,
    compression: CompressionArg = None,
    use_system: bool = True,
    allow_subprocesses: bool = True,
    context_wrapper: bool = None,
    file_type: FileType = None,
    validate: bool = True,
    overwrite: bool = True,
    close_fileobj: bool = True,
    **kwargs,
) -> FileLike:
    """
    Replacement for the builtin `open` function that can also open URLs and
    subprocessess, and automatically handles compressed files.

    Args:
        target: A relative or absolute path, a URL, a system command, a
            file-like object, or :class:`bytes` or :class:`str` to
            indicate a writeable byte/string buffer.
        mode: Some combination of the access mode ('r', 'w', 'a', or 'x')
            and the open mode ('b' or 't'). If the later is not given, 't'
            is used by default.
        compression: If None or True, compression type (if any) will be
            determined automatically. If False, no attempt will be made to
            determine compression type. Otherwise this must specify the
            compression type (e.g. 'gz'). See `xphyle.compression` for
            details. Note that compression will *not* be guessed for
            '-' (stdin).
        use_system: Whether to attempt to use system-level compression
            programs.
        allow_subprocesses: Whether to allow `path` to be a subprocess (e.g.
            '|cat'). There are security risks associated with allowing
            users to run arbitrary system commands.
        context_wrapper: If True, the file is wrapped in a `FileLikeWrapper`
            subclass before returning (`FileWrapper` for files/URLs,
            `StdWrapper` for STDIN/STDOUT/STDERR). If None, the default value
            (set using :method:`configure`) is used.
        file_type: a FileType; explicitly specify the file type. By default the
            file type is detected, but auto-detection might make mistakes, e.g.
            a local file contains a colon (':') in the name.
        validate: Ensure that the user-specified compression format matches the
            format guessed from the file extension or magic bytes.
        overwrite: For files opened in write mode, whether to overwrite
            existing files (True).
        close_fileobj: When `path` is a file-like object / `file_type` is
            FileType.FILELIKE, and `context_wrapper` is True, whether to close
            the underlying file when closing the wrapper.
        kwargs: Additional keyword arguments to pass to ``open``.

    `path` is interpreted as follows:
        * If starts with '|', it is assumed to be a system command
        * If a file-like object, it is used as-is
        * If one of STDIN, STDOUT, STDERR, the appropriate `sys` stream is used
        * If parseable by `xphyle.urls.parse_url()`, it is assumed to be a URL
        * If file_type == FileType.BUFFER and path is a string or bytes and
          mode is readable, a new StringIO/BytesIO is created with 'path' passed
          to its constructor.
        * Otherwise it is assumed to be a local file

    If `use_system` is True and the file is compressed, the file is opened with
    a pipe to the system-level compression program (e.g. ``gzip`` for '.gz'
    files) if possible, otherwise the corresponding python library is used.

    Returns:
        A Process if `file_type` is PROCESS, or if `file_type` is None and
        `path` starts with '|'. Otherwise, an opened file-like object. If
        `context_wrapper` is True, this will be a subclass of `FileLikeWrapper`.

    Raises:
        ValueError if:
            * ``compression`` is True and compression format cannot be
            determined
            * the specified compression format is invalid
            * ``validate`` is True and the specified compression format is not
                the acutal format of the file
            * the path or mode are invalid
    """
    if compression and isinstance(compression, str):
        cannonical_fmt_name = FORMATS.get_compression_format_name(compression)
        if cannonical_fmt_name is None:
            raise ValueError(
                "Invalid compression format: {}".format(compression))
        else:
            compression = cannonical_fmt_name

    # Convert placeholder strings ("-", "_") to paths
    target = convert_std_placeholder(target, mode)

    # Whether the file object is stdin/stdout/stderr
    is_std = target in (STDIN, STDOUT, STDERR)
    # Whether 'target' is currently a file-like object in binary mode
    is_bin = False
    # Whether target is a string
    is_str = isinstance(target, str)
    # Whether target is a Path
    is_path = not is_std and isinstance(target, PurePath)
    # Whether target is a class indicating a buffer type
    is_buffer = target in (str, bytes)

    if not file_type:
        if is_path:
            file_type = FileType.LOCAL
        elif is_std:
            file_type = FileType.STDIO
        elif is_buffer:
            file_type = FileType.BUFFER
        elif not is_str:
            file_type = FileType.FILELIKE
        elif target.startswith("|"):
            file_type = FileType.PROCESS
    elif file_type == FileType.BUFFER and (is_str or is_path
                                           or isinstance(target, bytes)):
        if not mode:
            mode = FileMode(access="r", coding="t" if is_str else "b")
        is_buffer = True
    elif ((is_str or is_path or is_buffer) == (file_type is FileType.FILELIKE)
          or is_std != (file_type is FileType.STDIO) or is_buffer !=
          (file_type is FileType.BUFFER)):
        raise ValueError(
            f"file_type = {file_type} does not match target {target}")

    url_parts = None
    if file_type in (FileType.URL, None):
        url_parts = parse_url(target)
        if not file_type:
            file_type = FileType.URL if url_parts else FileType.LOCAL
        elif not url_parts:
            raise ValueError(f"{target} is not a valid URL")

    if not mode:
        # set to default
        if not is_buffer:
            mode = FileMode()
        elif target == str:
            mode = FileMode("wt")
        else:
            mode = FileMode("wb")
    elif isinstance(mode, str):
        if "U" in mode and "newline" in kwargs and kwargs[
                "newline"] is not None:
            raise ValueError(
                "newline={} not compatible with universal newlines ('U') "
                "mode".format(kwargs["newline"]))
        mode = FileMode(mode)

    if context_wrapper is None:
        context_wrapper = DEFAULTS["xopen_context_wrapper"]

    # Return early if opening a process
    if file_type is FileType.PROCESS:
        if not allow_subprocesses:
            raise ValueError("Subprocesses are disallowed")
        if target.startswith("|"):
            target = target[1:]
        popen_args = dict(kwargs)
        for std in ("stdin", "stdout", "stderr"):
            popen_args[std] = PIPE
        if mode.writable:
            if compression is True:
                raise ValueError(
                    "Can determine compression automatically when writing to "
                    "process stdin")
            elif compression is None:
                compression = False
            outstream = "stdin"
        else:
            outstream = "stdout"
        popen_args[outstream] = dict(
            mode=mode,
            compression=compression,
            validate=validate,
            context_wrapper=context_wrapper,
        )
        return popen(target, **popen_args)

    buffer = None

    if file_type is FileType.BUFFER:
        if target == str:
            target = io.StringIO()
        elif target == bytes:
            target = io.BytesIO()
            is_bin = True
        elif is_str or isinstance(target, bytes):
            if not mode.readable:
                raise ValueError(
                    "'mode' must be readable when 'file_type' == BUFFER "
                    "and 'target' is string or bytes.")
            if is_str:
                if mode.coding != ModeCoding.TEXT:
                    raise ValueError("Must use text mode with a string buffer")
                target = io.StringIO(target)
            else:
                if mode.coding != ModeCoding.BINARY:
                    raise ValueError(
                        "Must use binary mode with a bytes buffer")
                target = io.BytesIO(target)
                is_bin = True
        if context_wrapper:
            buffer = target
        if not mode.readable:
            if compression is True:
                raise ValueError(
                    "Cannot guess compression for a write-only buffer")
            elif compression is None:
                compression = False
            validate = False

    # The file handle we will open
    # TODO: figure out the right type
    fileobj: Any = None
    # The name to use for the file
    name = None
    # Guessed compression type, if compression in (None, True)
    guess = None
    # Whether to try and guess file format
    guess_format = compression in (None, True)
    # Whether to validate that the actual compression format matches expected
    validate = validate and bool(compression) and not guess_format

    if file_type is FileType.STDIO:
        use_system = False
        if target == STDERR:
            if not mode.writable:
                raise ValueError("Mode must be writable for stderr")
            stdobj = sys.stderr
        else:
            stdobj = sys.stdin if mode.readable else sys.stdout

        # whether we need the underlying byte stream regardless of the mode
        check_readable = mode.readable and (validate or guess_format)

        if mode.binary or compression or check_readable:
            # get the underlying binary stream
            fileobj = stdobj.buffer
            is_bin = True
        else:
            fileobj = stdobj

        if check_readable:
            if not hasattr(fileobj, "peek"):
                fileobj = io.BufferedReader(fileobj)
            guess = FORMATS.guess_format_from_buffer(fileobj)
        else:
            validate = False
    elif file_type in (FileType.FILELIKE, FileType.BUFFER):
        fileobj = target
        use_system = False

        # determine mode of fileobj
        if hasattr(fileobj, "mode"):
            fileobj_mode = FileMode(target.mode)
        elif hasattr(fileobj, "readable"):
            access = ModeAccess.READWRITE
            # if fileobj.readable and fileobj.writable:
            #     access = ModeAccess.READWRITE
            # elif fileobj.writable:
            #     access = ModeAccess.WRITE
            # else:
            #     access = ModeAccess.READ
            fileobj_mode = FileMode(
                access=access,
                coding="t" if hasattr(fileobj, "encoding") else "b")
        else:  # pragma: no-cover
            # TODO I don't think we can actually get here, but leaving for now.
            raise ValueError("Cannot determine file mode")

        # make sure modes are compatible
        if not ((mode.readable and fileobj_mode.readable) or
                (mode.writable and fileobj_mode.writable)):
            raise ValueError(
                "mode {} and file mode {} are not compatible".format(
                    mode, fileobj_mode))

        # compression/decompression only possible for binary files
        is_bin = fileobj_mode.binary
        if not is_bin:
            if compression:
                raise ValueError(
                    "Cannot compress to/decompress from a text-mode "
                    "file/buffer")
            else:
                # noinspection PyUnusedLocal
                guess_format = False
        elif validate or guess_format:
            if mode.readable:
                if not hasattr(fileobj, "peek"):
                    fileobj = io.BufferedReader(fileobj)
                guess = FORMATS.guess_format_from_buffer(fileobj)
            elif hasattr(fileobj, "name") and isinstance(fileobj.name, str):
                guess = FORMATS.guess_compression_format(fileobj.name)
            else:
                raise ValueError(
                    "Could not guess compression format from {}".format(
                        target))
    elif file_type is FileType.URL:
        if not mode.readable:
            raise ValueError("URLs can only be opened in read mode")

        fileobj = open_url(target)
        if not fileobj:
            raise ValueError("Could not open URL {}".format(target))

        use_system = False
        name = get_url_file_name(fileobj, url_parts)

        # Get compression format if not specified
        if validate or guess_format:
            guess = FORMATS.guess_format_from_buffer(fileobj)
            # The following code is never used, unless there is some
            # scenario in which the file type cannot be guessed from
            # the header bytes. I'll leave this here for now but keep
            # it commented out until someone provides an example of
            # why it's necessary.
            # if guess is None and guess_format:
            #     # Check if the MIME type indicates that the file is
            #     # compressed
            #     mime = get_url_mime_type(fileobj)
            #     if mime:
            # TODO: look at this https://github.com/dbtsai/python-mimeparse
            # or similar for mime parsing
            #         guess = get_format_for_mime_type(mime)
            #     # Try to guess from the file name
            #     if not guess and name:
            #         guess = guess_file_format(name)
    elif file_type is FileType.LOCAL:
        if is_str:
            target = Path(target)
        if mode.readable:
            target = check_readable_file(target)
            if validate or guess_format:
                guess = FORMATS.guess_format_from_file_header(target)
        else:
            target = check_writable_file(target)
            # If overwrite=False, check that the file doesn't already exist
            if not overwrite and os.path.exists(target):
                raise ValueError("File already exists: {}".format(target))
            if validate or guess_format:
                guess = FORMATS.guess_compression_format(target)

    if validate and guess != compression:
        # TODO: this is to handle the case where the same extension can be used for
        # multiple compression formats, and we're writing a file so the format cannot
        # be detected from the header. Formats currently does not support an extension
        # being used with multiple formats. Currently bgzip is the only format that has
        # this issue.
        if not mode.readable and FORMATS.has_compatible_extension(
                compression, guess):
            pass
        else:
            raise ValueError(
                "Acutal compression format {} is not compatible with expected "
                "format {}".format(guess, compression))
    elif guess:
        compression = guess
    elif compression is True:
        raise ValueError(f"Could not guess compression format from {target}")

    if compression:
        fmt = FORMATS.get_compression_format(str(compression))
        compression = fmt.name
        fileobj = fmt.open_file(fileobj or target,
                                mode,
                                use_system=use_system,
                                **kwargs)
        is_std = False
    elif not fileobj:
        fileobj = open(target, mode.value, **kwargs)
    elif mode.text and is_bin and (is_std or file_type is FileType.FILELIKE):
        fileobj = io.TextIOWrapper(fileobj)
        fileobj.mode = mode.value

    if context_wrapper:
        if is_std:
            fileobj = StdWrapper(fileobj, compression=compression)
        elif file_type == FileType.BUFFER:
            fileobj = BufferWrapper(fileobj,
                                    buffer,
                                    compression=compression,
                                    close_fileobj=close_fileobj)
        else:
            fileobj = FileWrapper(
                fileobj,
                name=name,
                mode=mode,
                compression=compression,
                close_fileobj=close_fileobj,
            )

    return fileobj
Пример #11
0
def xopen(
        path,  #: OpenArg,
        mode: ModeArg = None,
        compression: CompressionArg = None,
        use_system: bool = True,
        context_wrapper: bool = None,
        file_type: FileType = None,
        validate: bool = True,
        **kwargs) -> FileLike:
    """
    Replacement for the builtin `open` function that can also open URLs and
    subprocessess, and automatically handles compressed files.
    
    Args:
        path: A relative or absolute path, a URL, a system command, a
            file-like object, or :class:`bytes` or :class:`str` to
            indicate a writeable byte/string buffer.
        mode: Some combination of the access mode ('r', 'w', 'a', or 'x')
            and the open mode ('b' or 't'). If the later is not given, 't'
            is used by default.
        compression: If None or True, compression type (if any) will be
            determined automatically. If False, no attempt will be made to
            determine compression type. Otherwise this must specify the
            compression type (e.g. 'gz'). See `xphyle.compression` for
            details. Note that compression will *not* be guessed for
            '-' (stdin).
        use_system: Whether to attempt to use system-level compression
            programs.
        context_wrapper: If True, the file is wrapped in a `FileLikeWrapper`
            subclass before returning (`FileWrapper` for files/URLs,
            `StdWrapper` for STDIN/STDOUT/STDERR). If None, the default value
            (set using :method:`configure`) is used.
        file_type: a FileType; explicitly specify the file type. By default the
            file type is detected, but auto-detection might make mistakes, e.g.
            a local file contains a colon (':') in the name.
        validate: Ensure that the user-specified compression format matches the
            format guessed from the file extension or magic bytes.
        kwargs: Additional keyword arguments to pass to ``open``.
    
    `path` is interpreted as follows:
        * If starts with '|', it is assumed to be a system command
        * If a file-like object, it is used as-is
        * If one of STDIN, STDOUT, STDERR, the appropriate `sys` stream is used
        * If parseable by `xphyle.urls.parse_url()`, it is assumed to be a URL
        * If file_type == FileType.BUFFER and path is a string or bytes and
          mode is readable, a new StringIO/BytesIO is created with 'path' passed
          to its constructor.
        * Otherwise it is assumed to be a local file
    
    If `use_system` is True and the file is compressed, the file is opened with
    a pipe to the system-level compression program (e.g. ``gzip`` for '.gz'
    files) if possible, otherwise the corresponding python library is used.
    
    Returns:
        A Process if `file_type` is PROCESS, or if `file_type` is None and
        `path` starts with '|'. Otherwise, an opened file-like object. If
        `context_wrapper` is True, this will be a subclass of `FileLikeWrapper`.
    
    Raises:
        ValueError if:
            * ``compression`` is True and compression format cannot be
            determined
            * the specified compression format is invalid
            * ``validate`` is True and the specified compression format is not
                the acutal format of the file
            * the path or mode are invalid
    """
    if compression and isinstance(compression, str):
        cannonical_fmt_name = FORMATS.get_compression_format_name(compression)
        if cannonical_fmt_name is None:
            raise ValueError(
                "Invalid compression format: {}".format(compression))
        else:
            compression = cannonical_fmt_name

    # Whether the file object is stdin/stdout/stderr
    is_std = path in (STDIN, STDOUT, STDERR)
    # Whether path is a string or fileobj
    is_str = isinstance(path, str)
    # Whether path is a class indicating a buffer type
    is_buffer = path in (str, bytes)

    if not file_type:
        if is_std:
            file_type = FileType.STDIO
        elif is_buffer:
            file_type = FileType.BUFFER
        elif not is_str:
            file_type = FileType.FILELIKE
        elif path.startswith('|'):
            file_type = FileType.PROCESS
    elif file_type == FileType.BUFFER and (is_str or isinstance(path, bytes)):
        if not mode:
            mode = FileMode(access='r', coding='t' if is_str else 'b')
        is_buffer = True
    elif (is_str == (file_type is FileType.FILELIKE) or is_std !=
          (file_type is FileType.STDIO) or is_buffer !=
          (file_type is FileType.BUFFER)):
        raise ValueError("file_type = {} does not match path {}".format(
            file_type, path))

    if file_type in (FileType.URL, None):
        url_parts = parse_url(path)
        if not file_type:
            file_type = FileType.URL if url_parts else FileType.LOCAL
        elif not url_parts:
            raise ValueError("{} is not a valid URL".format(path))

    if not mode:
        # set to default
        if not is_buffer:
            mode = FileMode()
        elif path == str:
            mode = FileMode('wt')
        else:
            mode = FileMode('wb')
    elif isinstance(mode, str):
        if ('U' in mode and 'newline' in kwargs
                and kwargs['newline'] is not None):
            raise ValueError(
                "newline={} not compatible with universal newlines ('U') "
                "mode".format(kwargs['newline']))
        mode = FileMode(mode)

    if context_wrapper is None:
        context_wrapper = DEFAULTS['xopen_context_wrapper']

    # Return early if opening a process
    if file_type is FileType.PROCESS:
        if path.startswith('|'):
            path = path[1:]
        popen_args = dict(kwargs)
        for std in ('stdin', 'stdout', 'stderr'):
            popen_args[std] = PIPE
        if mode.writable:
            if compression is True:
                raise ValueError(
                    "Can determine compression automatically when writing to "
                    "process stdin")
            elif compression is None:
                compression = False
            target = 'stdin'
        else:
            target = 'stdout'
        popen_args[target] = dict(mode=mode,
                                  compression=compression,
                                  validate=validate,
                                  context_wrapper=context_wrapper)
        return popen(path, **popen_args)

    if file_type is FileType.BUFFER:
        if path == str:
            path = io.StringIO()
        elif path == bytes:
            path = io.BytesIO()
        elif is_str or isinstance(path, bytes):
            if not mode.readable:
                raise ValueError(
                    "'mode' must be readable when 'file_type' == BUFFER "
                    "and 'path' is string or bytes.")
            if is_str:
                if mode.coding != ModeCoding.TEXT:
                    raise ValueError("Must use text mode with a string buffer")
                path = io.StringIO(path)
            else:
                if mode.coding != ModeCoding.BINARY:
                    raise ValueError(
                        "Must use binary mode with a bytes buffer")
                path = io.BytesIO(path)
        if context_wrapper:
            buffer = path
        if not mode.readable:
            if compression is True:
                raise ValueError(
                    "Cannot guess compression for a write-only buffer")
            elif compression is None:
                compression = False
            validate = False

    # The file handle we will open
    # TODO: figure out the right type
    fileobj = None  # type: Any
    # The name to use for the file
    name = None
    # Guessed compression type, if compression in (None, True)
    guess = None
    # Whether to try and guess file format
    guess_format = compression in (None, True)
    # Whether to validate that the actually compression format matches expected
    validate = validate and bool(compression) and not guess_format

    if file_type is FileType.STDIO:
        use_system = False
        if path == STDERR:
            if not mode.writable:
                raise ValueError("Mode must be writable for stderr")
            stdobj = sys.stderr
        else:
            stdobj = sys.stdin if mode.readable else sys.stdout
        # get the underlying binary stream
        fileobj = stdobj.buffer
        if mode.readable and (validate or guess_format):
            if not hasattr(fileobj, 'peek'):
                fileobj = io.BufferedReader(fileobj)
            guess = FORMATS.guess_format_from_buffer(fileobj)
        else:
            validate = False
    elif file_type in (FileType.FILELIKE, FileType.BUFFER):
        fileobj = path
        use_system = False

        # determine mode of fileobj
        if hasattr(fileobj, 'mode'):
            fileobj_mode = FileMode(path.mode)
        elif hasattr(fileobj, 'readable'):
            access = ModeAccess.READWRITE
            # if fileobj.readable and fileobj.writable:
            #     access = ModeAccess.READWRITE
            # elif fileobj.writable:
            #     access = ModeAccess.WRITE
            # else:
            #     access = ModeAccess.READ
            fileobj_mode = FileMode(
                access=access,
                coding='t' if hasattr(fileobj, 'encoding') else 'b')
        else:  # pragma: no-cover
            # TODO I don't think we can actually get here, but leaving for now.
            raise ValueError("Cannot determine file mode")

        # make sure modes are compatible
        if not ((mode.readable and fileobj_mode.readable) or
                (mode.writable and fileobj_mode.writable)):
            raise ValueError(
                "mode {} and file mode {} are not compatible".format(
                    mode, fileobj_mode))

        # compression/decompression only possible for binary files
        if fileobj_mode.text:
            if compression:
                raise ValueError(
                    "Cannot compress to/decompress from a text-mode "
                    "file/buffer")
            else:
                guess_format = False
        elif validate or guess_format:
            if mode.readable:
                if not hasattr(fileobj, 'peek'):
                    fileobj = io.BufferedReader(fileobj)
                guess = FORMATS.guess_format_from_buffer(fileobj)
            elif hasattr(fileobj, 'name') and isinstance(fileobj.name, str):
                guess = FORMATS.guess_compression_format(fileobj.name)
            else:
                raise ValueError(
                    "Could not guess compression format from {}".format(path))
    elif file_type is FileType.URL:
        if not mode.readable:
            raise ValueError("URLs can only be opened in read mode")

        fileobj = open_url(path)
        if not fileobj:
            raise ValueError("Could not open URL {}".format(path))

        use_system = False
        name = get_url_file_name(fileobj, url_parts)

        # Get compression format if not specified
        if validate or guess_format:
            guess = FORMATS.guess_format_from_buffer(fileobj)
            # The following code is never used, unless there is some
            # scenario in which the file type cannot be guessed from
            # the header bytes. I'll leave this here for now but keep
            # it commented out until someone provides an example of
            # why it's necessary.
            # if guess is None and guess_format:
            #     # Check if the MIME type indicates that the file is
            #     # compressed
            #     mime = get_url_mime_type(fileobj)
            #     if mime:
            # TODO: look at this https://github.com/dbtsai/python-mimeparse
            # or similar for mime parsing
            #         guess = get_format_for_mime_type(mime)
            #     # Try to guess from the file name
            #     if not guess and name:
            #         guess = guess_file_format(name)
    elif file_type is FileType.LOCAL:
        if mode.readable:
            path = check_readable_file(path)
            if validate or guess_format:
                guess = FORMATS.guess_format_from_file_header(path)
        else:
            path = check_writable_file(path)
            if validate or guess_format:
                guess = FORMATS.guess_compression_format(path)

    if validate and guess != compression:
        raise ValueError(
            "Acutal compression format {} does not match expected "
            "format {}".format(guess, compression))
    elif guess:
        compression = guess
    elif compression is True:
        raise ValueError(
            "Could not guess compression format from {}".format(path))

    if compression:
        fmt = FORMATS.get_compression_format(str(compression))
        compression = fmt.name
        fileobj = fmt.open_file(fileobj or path,
                                mode,
                                use_system=use_system,
                                **kwargs)
        is_std = False
    elif not fileobj:
        fileobj = open(path, mode.value, **kwargs)
    elif mode.text and (is_std or (file_type is FileType.FILELIKE
                                   and not fileobj_mode.text)):
        fileobj = io.TextIOWrapper(fileobj)
        fileobj.mode = mode.value

    if context_wrapper:
        if is_std:
            fileobj = StdWrapper(fileobj, compression=compression)
        elif file_type == FileType.BUFFER:
            fileobj = BufferWrapper(fileobj, buffer, compression=compression)
        else:
            fileobj = FileWrapper(fileobj,
                                  name=name,
                                  mode=mode,
                                  compression=compression)

    return fileobj