def to_csv(self, path, sep=',', encoding='utf-8', engine='python', compression=None, profile_name=None): '''Write entityset to disk in the csv format, location specified by `path`. Path could be a local path or a S3 path. If writing to S3 a tar archive of files will be written. Args: path (str) : Location on disk to write to (will be created as a directory) sep (str) : String of length 1. Field delimiter for the output file. encoding (str) : A string representing the encoding to use in the output file, defaults to 'utf-8'. engine (str) : Name of the engine to use. Possible values are: {'c', 'python'}. compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}. profile_name (str) : Name of AWS profile to use, False to use an anonymous profile, or None. ''' serialize.write_data_description(self, path, format='csv', index=False, sep=sep, encoding=encoding, engine=engine, compression=compression, profile_name=profile_name) return self
def to_pickle(self, path, compression=None): '''Write entityset to disk in the pickle format, location specified by `path`. Args: path (str): location on disk to write to (will be created as a directory) compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}. ''' serialize.write_data_description(self, path, format='pickle', compression=compression) return self
def test_serialize_subdirs_not_removed(es, tmpdir): write_path = tmpdir.mkdir("test") test_dir = write_path.mkdir("test_dir") with open(str(write_path.join('data_description.json')), 'w') as f: json.dump('__SAMPLE_TEXT__', f) serialize.write_data_description(es, path=str(write_path), index='1', sep='\t', encoding='utf-8', compression=None) assert os.path.exists(str(test_dir)) with open(str(write_path.join('data_description.json')), 'r') as f: assert '__SAMPLE_TEXT__' not in json.load(f)
def to_parquet(self, path, engine='auto', compression=None): '''Write entityset to disk in the parquet format, location specified by `path`. Args: path (str): location on disk to write to (will be created as a directory) engine (str) : Name of the engine to use. Possible values are: {'auto', 'pyarrow', 'fastparquet'}. compression (str) : Name of the compression to use. Possible values are: {'snappy', 'gzip', 'brotli', None}. ''' serialize.write_data_description(self, path, format='parquet', engine=engine, compression=compression) return self
def to_pickle(self, path, compression=None, profile_name=None): '''Write entityset in the pickle format, location specified by `path`. Path could be a local path or a S3 path. If writing to S3 a tar archive of files will be written. Args: path (str): location on disk to write to (will be created as a directory) compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}. profile_name (str) : Name of AWS profile to use, False to use an anonymous profile, or None. ''' serialize.write_data_description(self, path, format='pickle', compression=compression, profile_name=profile_name) return self
def to_csv(self, path, sep=',', encoding='utf-8', engine='python', compression=None): '''Write entityset to disk in the csv format, location specified by `path`. Args: path (str) : Location on disk to write to (will be created as a directory) sep (str) : String of length 1. Field delimiter for the output file. encoding (str) : A string representing the encoding to use in the output file, defaults to 'utf-8'. engine (str) : Name of the engine to use. Possible values are: {'c', 'python'}. compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}. ''' serialize.write_data_description(self, path, format='csv', index=False, sep=sep, encoding=encoding, engine=engine, compression=compression) return self
def to_parquet(self, path, engine='auto', compression=None, profile_name=None): '''Write entityset to disk in the parquet format, location specified by `path`. Path could be a local path or a S3 path. If writing to S3 a tar archive of files will be written. Args: path (str): location on disk to write to (will be created as a directory) engine (str) : Name of the engine to use. Possible values are: {'auto', 'pyarrow', 'fastparquet'}. compression (str) : Name of the compression to use. Possible values are: {'snappy', 'gzip', 'brotli', None}. profile_name (str) : Name of AWS profile to use, False to use an anonymous profile, or None. ''' serialize.write_data_description(self, path, format='parquet', engine=engine, compression=compression, profile_name=profile_name) return self
def test_serialize_subdirs_not_removed(es, tmpdir): write_path = tmpdir.mkdir("test") test_dir = write_path.mkdir("test_dir") with open(str(write_path.join('data_description.json')), 'w') as f: json.dump('__SAMPLE_TEXT__', f) if ks and any(isinstance(e.df, ks.DataFrame) for e in es.entities): compression = 'none' else: compression = None serialize.write_data_description(es, path=str(write_path), index='1', sep='\t', encoding='utf-8', compression=compression) assert os.path.exists(str(test_dir)) with open(str(write_path.join('data_description.json')), 'r') as f: assert '__SAMPLE_TEXT__' not in json.load(f)
def test_serialize_invalid_formats(es, tmpdir): error_text = 'must be one of the following formats: {}' error_text = error_text.format(', '.join(serialize.FORMATS)) with pytest.raises(ValueError, match=error_text): serialize.write_data_description(es, path=str(tmpdir), format='')