def write_data_description(entityset, path, profile_name=None, **kwargs): '''Serialize entityset to data description and write to disk or S3 path. Args: entityset (EntitySet) : Instance of :class:`.EntitySet`. path (str) : Location on disk or S3 path to write `data_description.json` and entity data. profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials. Set to False to use an anonymous profile. kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile. ''' if _is_s3(path): with tempfile.TemporaryDirectory() as tmpdir: os.makedirs(os.path.join(tmpdir, 'data')) dump_data_description(entityset, tmpdir, **kwargs) file_path = create_archive(tmpdir) transport_params = {} session = boto3.Session() if isinstance(profile_name, str): transport_params = {'session': boto3.Session(profile_name=profile_name)} use_smartopen_es(file_path, path, transport_params, read=False) elif profile_name is False: use_s3fs_es(file_path, path, read=False) elif session.get_credentials() is not None: use_smartopen_es(file_path, path, read=False) else: use_s3fs_es(file_path, path, read=False) elif _is_url(path): raise ValueError("Writing to URLs is not supported") else: path = os.path.abspath(path) os.makedirs(os.path.join(path, 'data'), exist_ok=True) dump_data_description(entityset, path, **kwargs)
def read_entityset(path, profile_name=None, **kwargs): '''Read entityset from disk, S3 path, or URL. Args: path (str): Directory on disk, S3 path, or URL to read `data_description.json`. profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials. Set to False to use an anonymous profile. kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method. ''' if _is_url(path) or _is_s3(path): with tempfile.TemporaryDirectory() as tmpdir: file_name = Path(path).name file_path = os.path.join(tmpdir, file_name) transport_params = {} session = boto3.Session() if _is_url(path): use_smartopen_es(file_path, path) elif isinstance(profile_name, str): transport_params = { 'session': boto3.Session(profile_name=profile_name) } use_smartopen_es(file_path, path, transport_params) elif profile_name is False: use_s3fs_es(file_path, path) elif session.get_credentials() is not None: use_smartopen_es(file_path, path) else: use_s3fs_es(file_path, path) with tarfile.open(str(file_path)) as tar: tar.extractall(path=tmpdir) data_description = read_data_description(tmpdir) return description_to_entityset(data_description, **kwargs) else: data_description = read_data_description(path) return description_to_entityset(data_description, **kwargs)