示例#1
0
文件: _json.py 项目: llawall/pandas
    def _get_data_from_filepath(self, filepath_or_buffer):
        """
        The function read_json accepts three input types:
            1. filepath (string-like)
            2. file-like object (e.g. open file object, StringIO)
            3. JSON string

        This method turns (1) into (2) to simplify the rest of the processing.
        It returns input types (2) and (3) unchanged.
        """
        # if it is a string but the file does not exist, it might be a JSON string
        filepath_or_buffer = stringify_path(filepath_or_buffer)
        if (not isinstance(filepath_or_buffer, str)
                or is_url(filepath_or_buffer)
                or is_fsspec_url(filepath_or_buffer)
                or file_exists(filepath_or_buffer)):
            self.handles = get_handle(
                filepath_or_buffer,
                "r",
                encoding=self.encoding,
                compression=self.compression,
                storage_options=self.storage_options,
                errors=self.encoding_errors,
            )
            filepath_or_buffer = self.handles.handle

        return filepath_or_buffer
示例#2
0
文件: html.py 项目: ukarroum/pandas
def _read(
    obj: bytes | FilePath | ReadBuffer[str] | ReadBuffer[bytes], encoding: str | None
) -> str | bytes:
    """
    Try to read from a url, file or string.

    Parameters
    ----------
    obj : str, unicode, path object, or file-like object

    Returns
    -------
    raw_text : str
    """
    text: str | bytes
    if (
        is_url(obj)
        or hasattr(obj, "read")
        or (isinstance(obj, str) and file_exists(obj))
    ):
        # error: Argument 1 to "get_handle" has incompatible type "Union[str, bytes,
        # Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]";
        # expected "Union[PathLike[str], Union[str, Union[IO[Any], RawIOBase,
        # BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]]"
        with get_handle(
            obj, "r", encoding=encoding  # type: ignore[arg-type]
        ) as handles:
            text = handles.handle.read()
    elif isinstance(obj, (str, bytes)):
        text = obj
    else:
        raise TypeError(f"Cannot read object of type '{type(obj).__name__}'")
    return text
示例#3
0
文件: _json.py 项目: tnir/pandas
    def _get_data_from_filepath(self, filepath_or_buffer):
        """
        The function read_json accepts three input types:
            1. filepath (string-like)
            2. file-like object (e.g. open file object, StringIO)
            3. JSON string

        This method turns (1) into (2) to simplify the rest of the processing.
        It returns input types (2) and (3) unchanged.

        It raises FileNotFoundError if the input is a string ending in
        one of .json, .json.gz, .json.bz2, etc. but no such file exists.
        """
        # if it is a string but the file does not exist, it might be a JSON string
        filepath_or_buffer = stringify_path(filepath_or_buffer)
        if (
            not isinstance(filepath_or_buffer, str)
            or is_url(filepath_or_buffer)
            or is_fsspec_url(filepath_or_buffer)
            or file_exists(filepath_or_buffer)
        ):
            self.handles = get_handle(
                filepath_or_buffer,
                "r",
                encoding=self.encoding,
                compression=self.compression,
                storage_options=self.storage_options,
                errors=self.encoding_errors,
            )
            filepath_or_buffer = self.handles.handle
        elif (
            isinstance(filepath_or_buffer, str)
            and filepath_or_buffer.lower().endswith(
                (".json",) + tuple(f".json{c}" for c in _extension_to_compression)
            )
            and not file_exists(filepath_or_buffer)
        ):
            raise FileNotFoundError(f"File {filepath_or_buffer} does not exist")

        return filepath_or_buffer
示例#4
0
def get_data_from_filepath(
    filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str],
    encoding,
    compression: CompressionOptions,
    storage_options: StorageOptions,
) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]:
    """
    Extract raw XML data.

    The method accepts three input types:
        1. filepath (string-like)
        2. file-like object (e.g. open file object, StringIO)
        3. XML string or bytes

    This method turns (1) into (2) to simplify the rest of the processing.
    It returns input types (2) and (3) unchanged.
    """
    if not isinstance(filepath_or_buffer, bytes):
        filepath_or_buffer = stringify_path(filepath_or_buffer)

    if (
        isinstance(filepath_or_buffer, str)
        and not filepath_or_buffer.startswith(("<?xml", "<"))
    ) and (
        not isinstance(filepath_or_buffer, str)
        or is_url(filepath_or_buffer)
        or is_fsspec_url(filepath_or_buffer)
        or file_exists(filepath_or_buffer)
    ):
        with get_handle(
            filepath_or_buffer,
            "r",
            encoding=encoding,
            compression=compression,
            storage_options=storage_options,
        ) as handle_obj:
            filepath_or_buffer = (
                # error: Incompatible types in assignment (expression has type
                # "Union[str, IO[str]]", variable has type "Union[Union[str,
                # PathLike[str]], bytes, ReadBuffer[bytes], ReadBuffer[str]]")
                handle_obj.handle.read()  # type: ignore[assignment]
                if hasattr(handle_obj.handle, "read")
                else handle_obj.handle
            )

    return filepath_or_buffer
示例#5
0
def get_data_from_filepath(
    filepath_or_buffer,
    encoding,
    compression,
    storage_options,
) -> Union[str, bytes, Buffer]:
    """
    Extract raw XML data.

    The method accepts three input types:
        1. filepath (string-like)
        2. file-like object (e.g. open file object, StringIO)
        3. XML string or bytes

    This method turns (1) into (2) to simplify the rest of the processing.
    It returns input types (2) and (3) unchanged.
    """
    filepath_or_buffer = stringify_path(filepath_or_buffer)

    if (isinstance(filepath_or_buffer, str)
            and not filepath_or_buffer.startswith(
                ("<?xml", "<"))) and (not isinstance(filepath_or_buffer, str)
                                      or is_url(filepath_or_buffer)
                                      or is_fsspec_url(filepath_or_buffer)
                                      or file_exists(filepath_or_buffer)):
        with get_handle(
                filepath_or_buffer,
                "r",
                encoding=encoding,
                compression=compression,
                storage_options=storage_options,
        ) as handle_obj:
            filepath_or_buffer = (handle_obj.handle.read() if hasattr(
                handle_obj.handle, "read") else handle_obj.handle)

    return filepath_or_buffer