Exemplo n.º 1
0
def _safe_load_json(path: str, **kwargs: Any) -> pd.DataFrame:
    kw = {"orient": "records", "lines": True, **kwargs}
    try:
        return pd.read_json(path, **kw)
    except (IsADirectoryError, PermissionError):
        fs = FileSystem()
        return pd.concat([
            pd.read_json(pfs.path.join(path, os.path.basename(x.path)), **kw)
            for x in fs.opendir(path).glob("*.json")
        ])
Exemplo n.º 2
0
def _load_avro(p: FileParser,
               columns: Any = None,
               **kwargs: Any) -> Tuple[pd.DataFrame, Any]:
    path = p.uri
    try:
        pdf = _load_single_avro(path, **kwargs)
    except (IsADirectoryError, PermissionError, FileExpected):
        fs = FileSystem()
        pdf = pd.concat([
            _load_single_avro(pfs.path.join(path, os.path.basename(x.path)),
                              **kwargs)
            for x in fs.opendir(path).glob("*.avro")
        ])

    if columns is None:
        return pdf, None
    if isinstance(columns, list):  # column names
        return pdf[columns], None

    schema = Schema(columns)

    # Return created DataFrame
    return pdf[schema.names], schema
Exemplo n.º 3
0
 def load_dir() -> pd.DataFrame:
     fs = FileSystem()
     return pd.concat([
         pd.read_csv(pfs.path.join(path, os.path.basename(x.path)),
                     **kwargs) for x in fs.opendir(path).glob("*.csv")
     ])