def read_entityset(path, profile_name=None, **kwargs): '''Read entityset from disk, S3 path, or URL. Args: path (str): Directory on disk, S3 path, or URL to read `data_description.json`. profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials. Set to False to use an anonymous profile. kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method. ''' if _is_url(path) or _is_s3(path) or _is_local_tar(str(path)): with tempfile.TemporaryDirectory() as tmpdir: local_path = path transport_params = None if _is_s3(path): transport_params = get_transport_params(profile_name) if _is_s3(path) or _is_url(path): local_path = os.path.join(tmpdir, "temporary_es") use_smartopen_es(local_path, path, transport_params) with tarfile.open(str(local_path)) as tar: tar.extractall(path=tmpdir) data_description = read_data_description(tmpdir) return description_to_entityset(data_description, **kwargs) else: data_description = read_data_description(path) return description_to_entityset(data_description, **kwargs)
def save(self, location, profile_name): features_dict = self.to_dict() if location is None: return json.dumps(features_dict) if isinstance(location, str): transport_params = {} if _is_url(location): raise ValueError("Writing to URLs is not supported") if _is_s3(location): boto3 = import_or_raise("boto3", BOTO3_ERR_MSG) session = boto3.Session() if isinstance(profile_name, str): transport_params = {'session': boto3.Session(profile_name=profile_name)} use_smartopen_features(location, features_dict, transport_params, read=False) elif profile_name is False: use_s3fs_features(location, features_dict, read=False) elif session.get_credentials() is not None: use_smartopen_features(location, features_dict, read=False) else: use_s3fs_features(location, features_dict, read=False) else: with open(location, "w") as f: json.dump(features_dict, f) else: json.dump(features_dict, location)
def write_data_description(entityset, path, profile_name=None, **kwargs): """Serialize entityset to data description and write to disk or S3 path. Args: entityset (EntitySet) : Instance of :class:`.EntitySet`. path (str) : Location on disk or S3 path to write `data_description.json` and dataframe data. profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials. Set to False to use an anonymous profile. kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile. """ if _is_s3(path): with tempfile.TemporaryDirectory() as tmpdir: os.makedirs(os.path.join(tmpdir, "data")) dump_data_description(entityset, tmpdir, **kwargs) file_path = create_archive(tmpdir) transport_params = get_transport_params(profile_name) use_smartopen_es(file_path, path, read=False, transport_params=transport_params) elif _is_url(path): raise ValueError("Writing to URLs is not supported") else: path = os.path.abspath(path) os.makedirs(os.path.join(path, "data"), exist_ok=True) dump_data_description(entityset, path, **kwargs)
def write_data_description(entityset, path, profile_name=None, **kwargs): '''Serialize entityset to data description and write to disk or S3 path. Args: entityset (EntitySet) : Instance of :class:`.EntitySet`. path (str) : Location on disk or S3 path to write `data_description.json` and entity data. profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials. Set to False to use an anonymous profile. kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile. ''' if _is_s3(path): boto3 = import_or_raise("boto3", BOTO3_ERR_MSG) with tempfile.TemporaryDirectory() as tmpdir: os.makedirs(os.path.join(tmpdir, 'data')) dump_data_description(entityset, tmpdir, **kwargs) file_path = create_archive(tmpdir) transport_params = {} session = boto3.Session() if isinstance(profile_name, str): transport_params = {'session': boto3.Session(profile_name=profile_name)} use_smartopen_es(file_path, path, transport_params, read=False) elif profile_name is False: use_s3fs_es(file_path, path, read=False) elif session.get_credentials() is not None: use_smartopen_es(file_path, path, read=False) else: use_s3fs_es(file_path, path, read=False) elif _is_url(path): raise ValueError("Writing to URLs is not supported") else: path = os.path.abspath(path) os.makedirs(os.path.join(path, 'data'), exist_ok=True) dump_data_description(entityset, path, **kwargs)
def load(cls, features, profile_name): if isinstance(features, str): try: features_dict = json.loads(features) except ValueError: if _is_url(features): features_dict = use_smartopen_features(features) elif _is_s3(features): session = boto3.Session() if isinstance(profile_name, str): transport_params = { 'session': boto3.Session(profile_name=profile_name) } features_dict = use_smartopen_features( features, transport_params) elif profile_name is False: features_dict = use_s3fs_features(features) elif session.get_credentials() is not None: features_dict = use_smartopen_features(features) else: features_dict = use_s3fs_features(features) else: with open(features, 'r') as f: features_dict = json.load(f) return cls(features_dict) return cls(json.load(features))
def read_entityset(path, profile_name=None, **kwargs): '''Read entityset from disk, S3 path, or URL. Args: path (str): Directory on disk, S3 path, or URL to read `data_description.json`. profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials. Set to False to use an anonymous profile. kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method. ''' if _is_url(path) or _is_s3(path): with tempfile.TemporaryDirectory() as tmpdir: file_name = Path(path).name file_path = os.path.join(tmpdir, file_name) transport_params = {} session = boto3.Session() if _is_url(path): use_smartopen_es(file_path, path) elif isinstance(profile_name, str): transport_params = { 'session': boto3.Session(profile_name=profile_name) } use_smartopen_es(file_path, path, transport_params) elif profile_name is False: use_s3fs_es(file_path, path) elif session.get_credentials() is not None: use_smartopen_es(file_path, path) else: use_s3fs_es(file_path, path) with tarfile.open(str(file_path)) as tar: tar.extractall(path=tmpdir) data_description = read_data_description(tmpdir) return description_to_entityset(data_description, **kwargs) else: data_description = read_data_description(path) return description_to_entityset(data_description, **kwargs)
def load(cls, features, profile_name): if isinstance(features, str): try: features_dict = json.loads(features) except ValueError: if _is_url(features) or _is_s3(features): transport_params = None if _is_s3(features): transport_params = get_transport_params(profile_name) features_dict = use_smartopen_features( features, transport_params=transport_params) else: with open(features, "r") as f: features_dict = json.load(f) return cls(features_dict) return cls(json.load(features))
def save(self, location, profile_name): features_dict = self.to_dict() if location is None: return json.dumps(features_dict) if isinstance(location, str): if _is_url(location): raise ValueError("Writing to URLs is not supported") if _is_s3(location): transport_params = get_transport_params(profile_name) use_smartopen_features( location, features_dict, transport_params, read=False ) else: with open(location, "w") as f: json.dump(features_dict, f) else: json.dump(features_dict, location)