Пример #1
0
    def copy(self, **kwargs):
        """create a copy of the Data object

        .. code-block:: python

            data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable")
            datac = data.copy()
            print("data  {0.uuid}".format(data))
            print("datac {0.uuid}".format(datac))
            print("datac.metadata['!sdata_parent'] {0.value}".format(datac.metadata["sdata_parent"]))

        .. code-block::

            data  38b26864e7794f5182d38459bab85842
            datac 2c4eb15900af435d8cd9c8573ca777e2
            datac.metadata['!sdata_parent'] 38b26864e7794f5182d38459bab85842

        :return: Data
        """
        data = copy.deepcopy(self)
        data.metadata.add(self.SDATA_PARENT, self.uuid)
        data.metadata.add(self.SDATA_UUID, self.gen_uuid())
        data.metadata.add(self.SDATA_MTIME, now_utc_str(), dtype="str")
        if "uuid" in kwargs:
            data.uuid = kwargs.get("uuid")
        if "name" in kwargs:
            data.name = kwargs.get("name")
        logger.debug(f"make copy of {self.uuid} -> {data.uuid}")

        return data
Пример #2
0
 def initialize(self, **kwargs):
     logger.info(f"initialize {self.rootpath}")
     metadata = Metadata()
     metadata.add("ctime", now_utc_str())
     with pd.HDFStore(self.rootpath, **kwargs) as hdf:
         hdf.put('metadata', metadata.df, format='fixed', data_columns=True)
Пример #3
0
    def __init__(self, **kwargs):
        """create Data object

        .. code-block:: python

            df = pd.DataFrame([1,2,3])
            data = sdata.Data(name='my name',
                        uuid='38b26864e7794f5182d38459bab85842',
                        table=df,
                        description="A remarkable description")


        :param name: name of the data object
        :param table: pandas.DataFrame to store
        :param uuid: uuid of the object
        :param metadata: sdata.Metadata object
        :param description: a string to describe the object
        """

        # self._uuid = None
        # self._name = None
        self._prefix = None

        # ToDo: add getter and setter for metadata
        # self.metadata = kwargs.get("metadata") or Metadata()

        self.metadata = Metadata()

        # set default sdata attributes
        self.metadata.add(self.SDATA_VERSION,
                          __version__,
                          dtype="str",
                          description="sdata package version")
        self.metadata.add(self.SDATA_NAME,
                          "N.N.",
                          dtype="str",
                          description="name of the data object")
        self.metadata.add(self.SDATA_UUID,
                          "",
                          dtype="str",
                          description="Universally Unique Identifier")
        self.metadata.add(self.SDATA_PARENT,
                          "",
                          dtype="str",
                          description="uuid of the parent sdata object")
        self.metadata.add(self.SDATA_CLASS,
                          self.__class__.__name__,
                          dtype="str",
                          description="sdata class")
        self.metadata.add(self.SDATA_CTIME,
                          now_utc_str(),
                          dtype="str",
                          description="creation date")
        self.metadata.add(self.SDATA_MTIME,
                          now_utc_str(),
                          dtype="str",
                          description="modification date")

        metadata = kwargs.get("metadata")
        if metadata is not None:
            # logger.debug("Data got Metadata {}".format(metadata))
            if metadata and isinstance(metadata, Metadata):
                for attribute in metadata.attributes.values():
                    # logger.debug("Data.Metadata.add {0.name}:{0.value}".format(attribute))
                    self.metadata.add(attribute)

        # auto correct
        if kwargs.get("auto_correct") is None or kwargs.get(
                "auto_correct") is True:
            self.auto_correct = True
        else:
            self.auto_correct = False
        # logger.debug("sdata: set auto_correct={}".format(self.auto_correct))

        if kwargs.get("name") is not None:
            self.name = kwargs.get("name")

        self.prefix = kwargs.get("prefix") or ""
        self._gen_default_attributes(
            kwargs.get("default_attributes") or self.ATTR_NAMES)
        self._group = OrderedDict()
        self._table = None  # pd.DataFrame()
        self.table = kwargs.get("table", None)
        self._description = ""
        self.description = kwargs.get("description", "")
        self.project = kwargs.get("project", "")

        if (kwargs.get("uuid") == ""
                or kwargs.get("uuid") is not None) and not self.metadata.get(
                    self.SDATA_UUID).value and kwargs.get("uuid") != "hash":
            # logger.info("uuid in kwargs")
            try:
                self._set_uuid(kwargs.get(
                    "uuid"))  # store given uuid str or generate a new uuid
            except Sdata_Uuid_Exeption as exp:
                if self.auto_correct is True:
                    logger.warning("got invalid uuid -> generate a new uuid")
                    self._set_uuid(uuid.uuid4())
                else:
                    raise
        elif (kwargs.get("uuid") == "" or kwargs.get("uuid") is None
              ) and self.metadata.get(self.SDATA_UUID).value != "":
            # logger.info("uuid in metadata")
            pass
        elif kwargs.get("uuid") == "hash":
            sha3_256 = self.gen_uuid_from_state()
            # logger.info("gen uuid from sha3_256 {}".format(sha3_256))
            new_uuid = uuid_from_str(sha3_256)
            self._set_uuid(new_uuid.hex)
        else:
            # logger.info("uuid new")
            self._set_uuid(uuid.uuid4())
Пример #4
0
    def update_mtime(self):
        """update modification time

        :return:
        """
        self.metadata.add(self.SDATA_MTIME, now_utc_str())
Пример #5
0
    def to_html(self, filepath, xlsx=True, style=None):
        """export Data to html

        :param filepath:
        :param xlsx:
        :param style:
        :return:
        """

        table_values = self.df.head()
        table_headers = self.df.columns

        table_description_values = self.df.describe()
        table_description_headers = self.df.describe().columns

        metadata_values = self.metadata.df.head().values
        metadata_headers = self.metadata.df.columns

        if xlsx is True:
            xlsx_tag = self.get_download_link()
        else:
            xlsx_tag = ""

        param = {
            "title":
            "{0} [{1}]".format(self.osname, self.uuid),
            "description":
            self.description,
            "metadata":
            tabulate(metadata_values, metadata_headers, tablefmt="html"),
            "table":
            tabulate(table_values, table_headers, tablefmt="html"),
            "table_description":
            tabulate(table_description_values,
                     table_description_headers,
                     tablefmt="html"),
            "xlsx_tag":
            xlsx_tag,
            "sdata":
            "created with sdata v{}.".format(__version__),
            "now":
            "{}".format(now_utc_str()),
        }

        tmpl = """<!DOCTYPE html>
<html lang="de">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{title}</title>
    <style>
    h1 {{ 
       background-color: #00FFFF77;
       color: black;
    }}
    h2 {{ 
       background-color: #00FFFF44;
       color: black;
    }}
    h3 {{ 
       background-color: #00FFFF11;
       color: black;
    }}
    p {{
    	color: black;
    }}
    table, th, td, caption {{
    border: 1px solid #a0a0a0;
    }}
    
    table {{
      border-collapse: collapse;
      border-spacing: 1em;
      border-width: thin 0 0 thin;
      margin: 0 0 1em;
      table-layout: auto;
      max-width: 100%;
      text-align: right;
    }}
    th, td {{
      font-weight: normal;
      text-align: left;
      border-spacing: 1em;
      padding: .1em .3em;
    }}
    th, caption {{
      background-color: #f1f3f4;
      font-weight: 700;
    }}
    </style>
  </head>
  <body>
    <h1>{title}</h1>
    <h2>Download</h2>
    <p">{xlsx_tag}</p>
    <h2>Description</h2>
    <p>{description}</p>
    <h2>Metadata</h2>
    {metadata}
    <h2>Table</h2>
    {table}
    <h3>Table Description</h3>
    {table_description}
    <p>{sdata}</p>
    <p>{now}</p>
  </body>
</html>""".format(**param)
        try:
            with open(filepath, "w") as fh:
                fh.write(tmpl)
        except Exception as exp:
            raise