def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() if extension == "json": return json.dumps(data).encode("utf-8"), self.default_mimetype() elif extension in ["html", "htm"]: if isinstance(data, str): return data.encode("utf-8"), mimetype_from_extension("html") else: return ( f"<pre>{json.dumps(data)}</pre>".encode("utf-8"), mimetype_from_extension("html"), ) raise Exception(f"Unsupported file extension: {extension}")
def with_filename(self, filename): """set filename""" self.metadata["filename"] = filename if "." in filename: self.extension = filename.split(".")[-1].lower() self.metadata["mimetype"] = mimetype_from_extension(self.extension) return self
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension == "json": output = StringIO() output.write(data.to_json()) return output.getvalue().encode("utf-8"), mimetype elif extension == "yaml": output = StringIO() output.write(data.to_yaml()) return output.getvalue().encode("utf-8"), mimetype elif extension in ("h5", "hdf5"): handle, name = mkstemp( prefix="keras_model_", suffix="." + extension ) # HACK - we need a file name, NamedTemporaryFile implementation does not work in windows os.close(handle) data.save(name) b = open(name, "rb").read() os.remove(name) return b, mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by kerasmodel type." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() mime = self.default_mimetype() else: mime = mimetype_from_extension(extension, "text/plain") return data.encode("utf-8"), mime
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension == "parquet": with TemporaryDirectory() as tmpdir: path = Path(tmpdir) / f"data.{extension}" table = pyarrow.Table.from_batches(data.collect()) pyarrow.parquet.write_table(table, str(path)) b = path.read_bytes() return b, mimetype elif extension == "csv": with TemporaryDirectory() as tmpdir: path = Path(tmpdir) / f"data.{extension}" table = pyarrow.Table.from_batches(data.collect()) pyarrow.csv.write_csv(table, str(path)) b = path.read_bytes() return b, mimetype elif extension == "feather": with TemporaryDirectory() as tmpdir: path = Path(tmpdir) / f"data.{extension}" table = pyarrow.Table.from_batches(data.collect()) pyarrow.feather.write_feather(table, str(path)) b = path.read_bytes() return b, mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by DataFusion data-frame type." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() if extension == "djson": d = "{\n" sep = "" for key, value in data.items(): assert isinstance(key, str) d += sep d += "%-20s%s" % (f'"{key}":', self.encode_element(value)) sep = ",\n" d += "\n}" return d.encode("utf-8"), mimetype_from_extension("djson") elif extension == "json": return json.dumps(data).encode("utf-8"), mimetype_from_extension("json") raise Exception(f"Unsupported file extension: {extension}")
def metadata(self): metadata = self._metadata.as_dict() title = self.title description = self.description if title is None: if self.raw_query is None: title = "" else: p = parse(self.raw_query) if title in ("", None): title = p.filename() or "" mimetype = self.mimetype if mimetype is None: if self.query is not None: if self.query.extension() is None: mimetype = "application/octet-stream" else: mimetype = mimetype_from_extension(self.query.extension()) message = self._metadata.message if message in (None, ""): log = self._metadata.get("log", []) if len(log): message = log[-1]["message"] if message in (None, ""): log = self._metadata.get("child_log", []) if len(log): message = log[-1]["message"] metadata.update( dict( status=self.status.value, title=title, description=description, mimetype=mimetype, query=self.raw_query, parent_query=self.parent_query, argument_queries=self.argument_queries, # log=self.log[:], is_error=self.is_error, direct_subqueries=self.direct_subqueries[:], progress_indicators=self.progress_indicators[:], child_progress_indicators=self.child_progress_indicators[:], child_log=self.child_log, message=message, started=self.started, updated=self.now(), created=self.created, caching=self.caching, vars=dict(self.vars), html_preview=self.html_preview, side_effect=False, )) return metadata
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension == "csv": output = StringIO() data.to_csv(output, index=False) return output.getvalue().encode("utf-8"), mimetype elif extension == "tsv": output = StringIO() data.to_csv(output, index=False, sep="\t") return output.getvalue().encode("utf-8"), mimetype elif extension == "json": output = StringIO() data.to_json(output, index=False, orient="table") return output.getvalue().encode("utf-8"), mimetype elif extension in ("html", "htm"): output = StringIO() data.to_html(output, index=False) return output.getvalue().encode("utf-8"), mimetype elif extension in ("pkl", "pickle"): output = ResilientBytesIO() data.to_pickle(output, compression=None) b = output.getvalue() output.really_close() return b, mimetype elif extension == "parquet": output = ResilientBytesIO() data.to_parquet(output, engine="pyarrow") b = output.getvalue() output.really_close() return b, mimetype elif extension == "feather": output = ResilientBytesIO() data.to_feather(output) b = output.getvalue() output.really_close() return b, mimetype elif extension == "xlsx": output = BytesIO() writer = pd.ExcelWriter(output, engine="xlsxwriter") data.to_excel(writer) writer.close() return output.getvalue(), mimetype elif extension == "msgpack": output = BytesIO() data.to_msgpack(output) return output.getvalue(), mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by dataframe type." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) if extension in ["idf", "json"]: mimetype = mimetype_from_extension("json") d = data.to_dict() return json.dumps(d).encode("utf-8"), mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by stored dataframe iterator type." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension in ("xlsx", "xltx"): with TemporaryDirectory() as tmpdir: path = Path(tmpdir) / f"data.{extension}" data.save(str(path)) b = path.read_bytes() return b, mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by openpyxl_workbook type." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension in ("pkl", "pickle"): output = BytesIO() pickle.dump(data, output) return output.getvalue(), mimetype elif extension in ("png", "svg", "pdf", "ps", "eps", "svgz"): output = BytesIO() data.savefig(output, dpi=300, format=extension) return output.getvalue(), mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by Matplotlib Figure type." )
def as_bytes(self, data, extension=None): """Serialize data as bytes File extension may be provided and influence the serialization format. """ if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension == "csv": output = "".join(data.gen_csv(show_headers=True, show_tags=True)) return output.encode("utf-8"), mimetype elif extension == "json": output = "".join(data.gen_json(show_headers=True, show_tags=True)) return output.encode("utf-8"), mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by HXL dataset type." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension == "csv": output = BytesIO() data.to_csv(output) return output.getvalue(), mimetype elif extension == "parquet": output = BytesIO() data.to_parquet(output) return output.getvalue(), mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by polars data-frame type." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) format_name, can_read, can_write = self.format_from_extension( extension) if can_write: output = BytesIO() data.save(output, format=format_name) return output.getvalue(), mimetype else: if can_read: raise Exception( f"Serialization: PIL Image only supports reading, but not writing for file extension {extension}." ) else: raise Exception( f"Serialization: file extension {extension} is not supported by PIL Image." )
def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension == "json": output = StringIO() output.write(data.to_json()) return output.getvalue().encode("utf-8"), mimetype elif extension == "yaml": output = StringIO() output.write(data.to_yaml()) return output.getvalue().encode("utf-8"), mimetype elif extension in ("h5", "hdf5"): with TemporaryDirectory() as tmpdir: path = Path(tmpdir) / f"data.{extension}" data.save(str(path)) b=path.read_bytes() return b, mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by kerasmodel type." )
def as_bytes(self, data, extension=None): return data, mimetype_from_extension(extension)
def mimetype(self): """Return mime type of the data""" return self.metadata.get("mimetype", mimetype_from_extension(self.extension))