def copy(self, **kwargs): """create a copy of the Data object .. code-block:: python data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable") datac = data.copy() print("data {0.uuid}".format(data)) print("datac {0.uuid}".format(datac)) print("datac.metadata['!sdata_parent'] {0.value}".format(datac.metadata["sdata_parent"])) .. code-block:: data 38b26864e7794f5182d38459bab85842 datac 2c4eb15900af435d8cd9c8573ca777e2 datac.metadata['!sdata_parent'] 38b26864e7794f5182d38459bab85842 :return: Data """ data = copy.deepcopy(self) data.metadata.add(self.SDATA_PARENT, self.uuid) data.metadata.add(self.SDATA_UUID, self.gen_uuid()) data.metadata.add(self.SDATA_MTIME, now_utc_str(), dtype="str") if "uuid" in kwargs: data.uuid = kwargs.get("uuid") if "name" in kwargs: data.name = kwargs.get("name") logger.debug(f"make copy of {self.uuid} -> {data.uuid}") return data
def initialize(self, **kwargs): logger.info(f"initialize {self.rootpath}") metadata = Metadata() metadata.add("ctime", now_utc_str()) with pd.HDFStore(self.rootpath, **kwargs) as hdf: hdf.put('metadata', metadata.df, format='fixed', data_columns=True)
def __init__(self, **kwargs): """create Data object .. code-block:: python df = pd.DataFrame([1,2,3]) data = sdata.Data(name='my name', uuid='38b26864e7794f5182d38459bab85842', table=df, description="A remarkable description") :param name: name of the data object :param table: pandas.DataFrame to store :param uuid: uuid of the object :param metadata: sdata.Metadata object :param description: a string to describe the object """ # self._uuid = None # self._name = None self._prefix = None # ToDo: add getter and setter for metadata # self.metadata = kwargs.get("metadata") or Metadata() self.metadata = Metadata() # set default sdata attributes self.metadata.add(self.SDATA_VERSION, __version__, dtype="str", description="sdata package version") self.metadata.add(self.SDATA_NAME, "N.N.", dtype="str", description="name of the data object") self.metadata.add(self.SDATA_UUID, "", dtype="str", description="Universally Unique Identifier") self.metadata.add(self.SDATA_PARENT, "", dtype="str", description="uuid of the parent sdata object") self.metadata.add(self.SDATA_CLASS, self.__class__.__name__, dtype="str", description="sdata class") self.metadata.add(self.SDATA_CTIME, now_utc_str(), dtype="str", description="creation date") self.metadata.add(self.SDATA_MTIME, now_utc_str(), dtype="str", description="modification date") metadata = kwargs.get("metadata") if metadata is not None: # logger.debug("Data got Metadata {}".format(metadata)) if metadata and isinstance(metadata, Metadata): for attribute in metadata.attributes.values(): # logger.debug("Data.Metadata.add {0.name}:{0.value}".format(attribute)) self.metadata.add(attribute) # auto correct if kwargs.get("auto_correct") is None or kwargs.get( "auto_correct") is True: self.auto_correct = True else: self.auto_correct = False # logger.debug("sdata: set auto_correct={}".format(self.auto_correct)) if kwargs.get("name") is not None: self.name = kwargs.get("name") self.prefix = kwargs.get("prefix") or "" self._gen_default_attributes( kwargs.get("default_attributes") or self.ATTR_NAMES) self._group = OrderedDict() self._table = None # pd.DataFrame() self.table = kwargs.get("table", None) self._description = "" self.description = kwargs.get("description", "") self.project = kwargs.get("project", "") if (kwargs.get("uuid") == "" or kwargs.get("uuid") is not None) and not self.metadata.get( self.SDATA_UUID).value and kwargs.get("uuid") != "hash": # logger.info("uuid in kwargs") try: self._set_uuid(kwargs.get( "uuid")) # store given uuid str or generate a new uuid except Sdata_Uuid_Exeption as exp: if self.auto_correct is True: logger.warning("got invalid uuid -> generate a new uuid") self._set_uuid(uuid.uuid4()) else: raise elif (kwargs.get("uuid") == "" or kwargs.get("uuid") is None ) and self.metadata.get(self.SDATA_UUID).value != "": # logger.info("uuid in metadata") pass elif kwargs.get("uuid") == "hash": sha3_256 = self.gen_uuid_from_state() # logger.info("gen uuid from sha3_256 {}".format(sha3_256)) new_uuid = uuid_from_str(sha3_256) self._set_uuid(new_uuid.hex) else: # logger.info("uuid new") self._set_uuid(uuid.uuid4())
def update_mtime(self): """update modification time :return: """ self.metadata.add(self.SDATA_MTIME, now_utc_str())
def to_html(self, filepath, xlsx=True, style=None): """export Data to html :param filepath: :param xlsx: :param style: :return: """ table_values = self.df.head() table_headers = self.df.columns table_description_values = self.df.describe() table_description_headers = self.df.describe().columns metadata_values = self.metadata.df.head().values metadata_headers = self.metadata.df.columns if xlsx is True: xlsx_tag = self.get_download_link() else: xlsx_tag = "" param = { "title": "{0} [{1}]".format(self.osname, self.uuid), "description": self.description, "metadata": tabulate(metadata_values, metadata_headers, tablefmt="html"), "table": tabulate(table_values, table_headers, tablefmt="html"), "table_description": tabulate(table_description_values, table_description_headers, tablefmt="html"), "xlsx_tag": xlsx_tag, "sdata": "created with sdata v{}.".format(__version__), "now": "{}".format(now_utc_str()), } tmpl = """<!DOCTYPE html> <html lang="de"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>{title}</title> <style> h1 {{ background-color: #00FFFF77; color: black; }} h2 {{ background-color: #00FFFF44; color: black; }} h3 {{ background-color: #00FFFF11; color: black; }} p {{ color: black; }} table, th, td, caption {{ border: 1px solid #a0a0a0; }} table {{ border-collapse: collapse; border-spacing: 1em; border-width: thin 0 0 thin; margin: 0 0 1em; table-layout: auto; max-width: 100%; text-align: right; }} th, td {{ font-weight: normal; text-align: left; border-spacing: 1em; padding: .1em .3em; }} th, caption {{ background-color: #f1f3f4; font-weight: 700; }} </style> </head> <body> <h1>{title}</h1> <h2>Download</h2> <p">{xlsx_tag}</p> <h2>Description</h2> <p>{description}</p> <h2>Metadata</h2> {metadata} <h2>Table</h2> {table} <h3>Table Description</h3> {table_description} <p>{sdata}</p> <p>{now}</p> </body> </html>""".format(**param) try: with open(filepath, "w") as fh: fh.write(tmpl) except Exception as exp: raise