def _safe_load_json(path: str, **kwargs: Any) -> pd.DataFrame: kw = {"orient": "records", "lines": True, **kwargs} try: return pd.read_json(path, **kw) except (IsADirectoryError, PermissionError): fs = FileSystem() return pd.concat([ pd.read_json(pfs.path.join(path, os.path.basename(x.path)), **kw) for x in fs.opendir(path).glob("*.json") ])
def _load_avro(p: FileParser, columns: Any = None, **kwargs: Any) -> Tuple[pd.DataFrame, Any]: path = p.uri try: pdf = _load_single_avro(path, **kwargs) except (IsADirectoryError, PermissionError, FileExpected): fs = FileSystem() pdf = pd.concat([ _load_single_avro(pfs.path.join(path, os.path.basename(x.path)), **kwargs) for x in fs.opendir(path).glob("*.avro") ]) if columns is None: return pdf, None if isinstance(columns, list): # column names return pdf[columns], None schema = Schema(columns) # Return created DataFrame return pdf[schema.names], schema
def load_dir() -> pd.DataFrame: fs = FileSystem() return pd.concat([ pd.read_csv(pfs.path.join(path, os.path.basename(x.path)), **kwargs) for x in fs.opendir(path).glob("*.csv") ])