Python Metadata примеры использования

Язык программирования: Python

Пространство имен/Пакет: sdata.metadata

Класс/Тип: Metadata

Примеров на hotexamples.com: 13

Python Metadata - 13 примеров найдено. Это лучшие примеры Python кода для sdata.metadata.Metadata, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Metadata(6)

from_dataframe(5)

add(2)

copy(1)

from_list(1)

get(1)

get_attr(1)

relabel(1)

set_attr(1)

to_csv(1)

to_csv_header(1)

to_dataframe(1)

to_dict(1)

to_json(1)

Пример #1

Показать файл

    def _load_metadata(path):
        """load metadata from csv

        :returns: Metadata instance"""
        metadata_filepath = os.path.join(path, "metadata.csv")
        if os.path.exists(metadata_filepath):
            metadata = Metadata().from_csv(metadata_filepath)
        else:
            metadata = Metadata()
        return metadata

Пример #2

Показать файл

 def __init__(self, **kwargs):
     Data.__init__(self, **kwargs)
     self._uuid = None
     self._group = OrderedDict()
     self.uuid = kwargs.get("uuid") or uuid.uuid4()
     self.metadata = kwargs.get("metadata") or Metadata()
     self._table = pd.DataFrame()

Пример #3

Показать файл

    def from_csv(cls, s=None, filepath=None, sep=";"):
        """import sdata.Data from csv

        :param s: csv str
        :param filepath:
        :param sep: separator (default=";")
        :return: sdata.Data
        """
        data = cls()
        if filepath:
            df = pd.read_csv(filepath, sep=";", comment="#", index_col=0)
            sio = open(filepath, "r")
        elif s is not None:
            sio = StringIO(s)
            pd.read_csv(sio, sep=";", comment="#")
            sio.seek(0)
        else:
            logger.error("data.from_csv: no csv data available")
            raise

        attritute_list = []
        for line in sio:
            if line.startswith("#;"):
                line = line.rstrip("\n")
                line = line.split(sep)
                attritute_list.append(line[1:8])
        data.metadata = Metadata.from_list(attritute_list)
        data.table = df
        return data

Пример #4

Показать файл

 def get_all_metadata(self):
     metadata_pathes = [x for x in self.hdf.keys() if "metadata" in x]
     datas = []
     for metadata_path in metadata_pathes:
         metadata_df = self.hdf.get(metadata_path)
         metadata = Metadata.from_dataframe(metadata_df)
         metadata.name = metadata.get("!sdata_uuid").value
         datas.append(metadata)
     return datas

Пример #5

Показать файл

    def metadata_from_hdf5(cls, filepath, **kwargs):
        """import sdata.Data.Metadata from hdf5

        :param filepath:
        :return: sdata.Data
        """
        if not os.path.exists:
            logger.error("hdf5 file '{}' not available".format(filepath))
            return

        with pd.HDFStore(filepath, mode="r+") as hdf:
            metadata_path = "/metadata".format(uuid)
            df_metadata = hdf.get(metadata_path)
            metadata = Metadata.from_dataframe(df_metadata)
            return metadata

Пример #6

Показать файл

    def get_data_by_uuid(self, uuid):
        """get table by uuid

        :param uuid:
        :return:
        """
        if uuid not in self.keys():
            logger.error("nod data with uuid '{}'".format(uuid))
            return None
        metadata_path = "/{}/metadata".format(uuid)
        table_path = "/{}/table".format(uuid)
        description_path = "/{}/description".format(uuid)
        df_metadata = self.hdf.get(metadata_path)
        df_table = self.hdf.get(table_path)
        df_description = self.hdf.get(description_path)
        metadata = Metadata.from_dataframe(df_metadata)
        # logger.debug("hdf {}".format(metadata.get("!sdata_uuid").value))
        data = Data(metadata=metadata, table=df_table)
        data.description_from_df(df_description)
        return data

Пример #7

Показать файл

    def from_hdf5(cls, filepath, **kwargs):
        """import sdata.Data from hdf5

        :param filepath:
        :return: sdata.Data
        """
        if not os.path.exists:
            logger.error("hdf5 file '{}' not available".format(filepath))
            return

        with pd.HDFStore(filepath, mode="r+") as hdf:
            metadata_path = "/metadata".format(uuid)
            table_path = "/table".format(uuid)
            description_path = "/description".format(uuid)
            df_metadata = hdf.get(metadata_path)
            df_table = hdf.get(table_path)
            df_description = hdf.get(description_path)
            metadata = Metadata.from_dataframe(df_metadata)
            # logger.debug("hdf {}".format(metadata.get("!sdata_uuid").value))
            data = Data(metadata=metadata, table=df_table)
            data.description_from_df(df_description)

        return data

Пример #8

Показать файл

    def load_blob(self, data_uuid, ignore_errors=True):
        """get blob from vault"""
        path = self._objectpath(data_uuid)
        logging.info("load blob {}".format(path))
        logger.debug(f'try to load {path}/metadata')

        with pd.HDFStore(self.rootpath, mode="r") as hdf:
            metadata_path = f'{path}/metadata'
            table_path = f'{path}/table'
            description_path = f'{path}/description'

            try:
                df_metadata = hdf.get(metadata_path)
                df_table = hdf.get(table_path)
                df_description = hdf.get(description_path)
                metadata = Metadata.from_dataframe(df_metadata)
                # logger.debug("hdf {}".format(metadata.get("!sdata_uuid").value))
                data = Data(metadata=metadata, table=df_table)
                data.description_from_df(df_description)
                return data
            except KeyError as exp:
                logger.warning(exp)
                if ignore_errors is False:
                    raise Hdf5VaultException(exp)

Пример #9

Показать файл

 def __init__(self, **kwargs):
     self._uuid = None
     self._name = None
     self.uuid = kwargs.get("uuid") or uuid.uuid4()
     self.name = kwargs.get("name") or "N.N."
     self.metadata = kwargs.get("metadata") or Metadata()

Пример #10

Показать файл

 def __init__(self, **kwargs):
     Group.__init__(self, **kwargs)
     self._uuid = None
     self._group = OrderedDict()
     self.uuid = kwargs.get("uuid") or uuid.uuid4()
     self.metadata = kwargs.get("metadata") or Metadata()

Пример #11

Показать файл

 def initialize(self, **kwargs):
     logger.info(f"initialize {self.rootpath}")
     metadata = Metadata()
     metadata.add("ctime", now_utc_str())
     with pd.HDFStore(self.rootpath, **kwargs) as hdf:
         hdf.put('metadata', metadata.df, format='fixed', data_columns=True)

Пример #12

Показать файл

    def __init__(self, **kwargs):
        """create Data object

        .. code-block:: python

            df = pd.DataFrame([1,2,3])
            data = sdata.Data(name='my name',
                        uuid='38b26864e7794f5182d38459bab85842',
                        table=df,
                        description="A remarkable description")


        :param name: name of the data object
        :param table: pandas.DataFrame to store
        :param uuid: uuid of the object
        :param metadata: sdata.Metadata object
        :param description: a string to describe the object
        """

        # self._uuid = None
        # self._name = None
        self._prefix = None

        # ToDo: add getter and setter for metadata
        # self.metadata = kwargs.get("metadata") or Metadata()

        self.metadata = Metadata()

        # set default sdata attributes
        self.metadata.add(self.SDATA_VERSION,
                          __version__,
                          dtype="str",
                          description="sdata package version")
        self.metadata.add(self.SDATA_NAME,
                          "N.N.",
                          dtype="str",
                          description="name of the data object")
        self.metadata.add(self.SDATA_UUID,
                          "",
                          dtype="str",
                          description="Universally Unique Identifier")
        self.metadata.add(self.SDATA_PARENT,
                          "",
                          dtype="str",
                          description="uuid of the parent sdata object")
        self.metadata.add(self.SDATA_CLASS,
                          self.__class__.__name__,
                          dtype="str",
                          description="sdata class")
        self.metadata.add(self.SDATA_CTIME,
                          now_utc_str(),
                          dtype="str",
                          description="creation date")
        self.metadata.add(self.SDATA_MTIME,
                          now_utc_str(),
                          dtype="str",
                          description="modification date")

        metadata = kwargs.get("metadata")
        if metadata is not None:
            # logger.debug("Data got Metadata {}".format(metadata))
            if metadata and isinstance(metadata, Metadata):
                for attribute in metadata.attributes.values():
                    # logger.debug("Data.Metadata.add {0.name}:{0.value}".format(attribute))
                    self.metadata.add(attribute)

        # auto correct
        if kwargs.get("auto_correct") is None or kwargs.get(
                "auto_correct") is True:
            self.auto_correct = True
        else:
            self.auto_correct = False
        # logger.debug("sdata: set auto_correct={}".format(self.auto_correct))

        if kwargs.get("name") is not None:
            self.name = kwargs.get("name")

        self.prefix = kwargs.get("prefix") or ""
        self._gen_default_attributes(
            kwargs.get("default_attributes") or self.ATTR_NAMES)
        self._group = OrderedDict()
        self._table = None  # pd.DataFrame()
        self.table = kwargs.get("table", None)
        self._description = ""
        self.description = kwargs.get("description", "")
        self.project = kwargs.get("project", "")

        if (kwargs.get("uuid") == ""
                or kwargs.get("uuid") is not None) and not self.metadata.get(
                    self.SDATA_UUID).value and kwargs.get("uuid") != "hash":
            # logger.info("uuid in kwargs")
            try:
                self._set_uuid(kwargs.get(
                    "uuid"))  # store given uuid str or generate a new uuid
            except Sdata_Uuid_Exeption as exp:
                if self.auto_correct is True:
                    logger.warning("got invalid uuid -> generate a new uuid")
                    self._set_uuid(uuid.uuid4())
                else:
                    raise
        elif (kwargs.get("uuid") == "" or kwargs.get("uuid") is None
              ) and self.metadata.get(self.SDATA_UUID).value != "":
            # logger.info("uuid in metadata")
            pass
        elif kwargs.get("uuid") == "hash":
            sha3_256 = self.gen_uuid_from_state()
            # logger.info("gen uuid from sha3_256 {}".format(sha3_256))
            new_uuid = uuid_from_str(sha3_256)
            self._set_uuid(new_uuid.hex)
        else:
            # logger.info("uuid new")
            self._set_uuid(uuid.uuid4())

Пример #13

Показать файл

class Data(object):
    """Base sdata object"""
    ATTR_NAMES = []

    SDATA_VERSION = "!sdata_version"
    SDATA_NAME = "!sdata_name"
    SDATA_UUID = "!sdata_uuid"
    SDATA_CTIME = "!sdata_ctime"
    SDATA_MTIME = "!sdata_mtime"
    SDATA_PARENT = "!sdata_parent"
    SDATA_CLASS = "!sdata_class"
    SDATA_PROJECT = "!sdata_project"

    SDATA_ATTRIBUTES = [
        SDATA_VERSION, SDATA_NAME, SDATA_UUID, SDATA_CLASS, SDATA_PARENT,
        SDATA_PROJECT, SDATA_CTIME, SDATA_MTIME
    ]

    def __init__(self, **kwargs):
        """create Data object

        .. code-block:: python

            df = pd.DataFrame([1,2,3])
            data = sdata.Data(name='my name',
                        uuid='38b26864e7794f5182d38459bab85842',
                        table=df,
                        description="A remarkable description")


        :param name: name of the data object
        :param table: pandas.DataFrame to store
        :param uuid: uuid of the object
        :param metadata: sdata.Metadata object
        :param description: a string to describe the object
        """

        # self._uuid = None
        # self._name = None
        self._prefix = None

        # ToDo: add getter and setter for metadata
        # self.metadata = kwargs.get("metadata") or Metadata()

        self.metadata = Metadata()

        # set default sdata attributes
        self.metadata.add(self.SDATA_VERSION,
                          __version__,
                          dtype="str",
                          description="sdata package version")
        self.metadata.add(self.SDATA_NAME,
                          "N.N.",
                          dtype="str",
                          description="name of the data object")
        self.metadata.add(self.SDATA_UUID,
                          "",
                          dtype="str",
                          description="Universally Unique Identifier")
        self.metadata.add(self.SDATA_PARENT,
                          "",
                          dtype="str",
                          description="uuid of the parent sdata object")
        self.metadata.add(self.SDATA_CLASS,
                          self.__class__.__name__,
                          dtype="str",
                          description="sdata class")
        self.metadata.add(self.SDATA_CTIME,
                          now_utc_str(),
                          dtype="str",
                          description="creation date")
        self.metadata.add(self.SDATA_MTIME,
                          now_utc_str(),
                          dtype="str",
                          description="modification date")

        metadata = kwargs.get("metadata")
        if metadata is not None:
            # logger.debug("Data got Metadata {}".format(metadata))
            if metadata and isinstance(metadata, Metadata):
                for attribute in metadata.attributes.values():
                    # logger.debug("Data.Metadata.add {0.name}:{0.value}".format(attribute))
                    self.metadata.add(attribute)

        # auto correct
        if kwargs.get("auto_correct") is None or kwargs.get(
                "auto_correct") is True:
            self.auto_correct = True
        else:
            self.auto_correct = False
        # logger.debug("sdata: set auto_correct={}".format(self.auto_correct))

        if kwargs.get("name") is not None:
            self.name = kwargs.get("name")

        self.prefix = kwargs.get("prefix") or ""
        self._gen_default_attributes(
            kwargs.get("default_attributes") or self.ATTR_NAMES)
        self._group = OrderedDict()
        self._table = None  # pd.DataFrame()
        self.table = kwargs.get("table", None)
        self._description = ""
        self.description = kwargs.get("description", "")
        self.project = kwargs.get("project", "")

        if (kwargs.get("uuid") == ""
                or kwargs.get("uuid") is not None) and not self.metadata.get(
                    self.SDATA_UUID).value and kwargs.get("uuid") != "hash":
            # logger.info("uuid in kwargs")
            try:
                self._set_uuid(kwargs.get(
                    "uuid"))  # store given uuid str or generate a new uuid
            except Sdata_Uuid_Exeption as exp:
                if self.auto_correct is True:
                    logger.warning("got invalid uuid -> generate a new uuid")
                    self._set_uuid(uuid.uuid4())
                else:
                    raise
        elif (kwargs.get("uuid") == "" or kwargs.get("uuid") is None
              ) and self.metadata.get(self.SDATA_UUID).value != "":
            # logger.info("uuid in metadata")
            pass
        elif kwargs.get("uuid") == "hash":
            sha3_256 = self.gen_uuid_from_state()
            # logger.info("gen uuid from sha3_256 {}".format(sha3_256))
            new_uuid = uuid_from_str(sha3_256)
            self._set_uuid(new_uuid.hex)
        else:
            # logger.info("uuid new")
            self._set_uuid(uuid.uuid4())

    def gen_uuid_from_state(self):
        """generate the same uuid for the same data

        :return: uuid
        """
        s = hashlib.sha3_256()
        metadata = self.metadata.copy()
        metadata.attributes.pop(self.SDATA_UUID)
        metadata.attributes.pop(self.SDATA_MTIME)
        metadata.attributes.pop(self.SDATA_CTIME)
        metadatastr = metadata.to_json().encode(errors="replace")
        s.update(metadatastr)
        if self.table is not None:
            tablestr = self.table.to_json().encode(errors="replace")
            s.update(tablestr)
        s.update(self.description.encode(errors="replace"))
        return s.hexdigest()

    def __eq__(self, other):
        """compare Data checksum
        
        :param other: sdata.Data objecet
        :return: True or False
        """
        if not isinstance(other, self.__class__):
            logger.debug("you should not compare {} with {}!".format(
                self.__class__.__name__, other.__class__.__name__))
            return False
        return self.sha3_256 == other.sha3_256

    def update_mtime(self):
        """update modification time

        :return:
        """
        self.metadata.add(self.SDATA_MTIME, now_utc_str())

    @property
    def sha3_256_table(self):
        """Return a SHA3 hash of the sData.table object with a hashbit length of 32 bytes.

        .. code-block:: python

            sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256_table

            'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

        :return: hashlib.sha3_256.hexdigest()
        """
        s = hashlib.sha3_256()
        if self.table is not None:
            tablestr = self.table.to_json().encode(errors="replace")
            s.update(tablestr)
        return s.hexdigest()

    @property
    def sha3_256(self):
        """Return a SHA3 hash of the sData object with a hashbit length of 32 bytes.

        .. code-block:: python

            sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256

            'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

        :return: hashlib.sha3_256.hexdigest()
        """
        s = hashlib.sha3_256()
        metadatastr = self.metadata.to_json().encode(errors="replace")
        s.update(metadatastr)
        if self.table is not None:
            tablestr = self.table.to_json().encode(errors="replace")
            s.update(tablestr)
        s.update(self.description.encode(errors="replace"))
        return s.hexdigest()

    def update_hash(self, hashobject):
        """A hash represents the object used to calculate a checksum of a
        string of information.

        .. code-block:: python

            data = sdata.Data()

            md5 = hashlib.md5()
            data.update_hash(md5)
            md5.hexdigest()
            'bbf323bdcb0bf961803b5504a8a60d69'

            sha1 = hashlib.sha1()
            data.update_hash(sha1)
            sha1.hexdigest()
            '3c59368c7735c1ecaf03ebd4c595bb6e73e90f0c'

            hashobject = hashlib.sha3_256()
            data.update_hash(hashobject).hexdigest()
            'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

            data.update_hash(hashobject).digest()
            b'M8...'

        :param hash: hash object, e.g. hashlib.sha1()
        :return: hash
        """
        if not (hasattr(hashobject, "update")
                and hasattr(hashobject, "hexdigest")):
            logger.error("Data.update_hash: given hashfunction is invalid")
            raise Exception("Data.update_hash: given hashfunction is invalid")

        metadatastr = self.metadata.to_json().encode(errors="replace")
        hashobject.update(metadatastr)
        if self.table is not None:
            tablestr = self.table.to_json().encode(errors="replace")
            hashobject.update(tablestr)
        hashobject.update(self.description.encode(errors="replace"))
        return hashobject

    def describe(self):
        """Generate descriptive info of the data

        .. code-block:: python

            df = pd.DataFrame([1,2,3])
            data = sdata.Data(name='my name',
                        uuid='38b26864e7794f5182d38459bab85842',
                        table=df,
                        description="A remarkable description")
            data.describe()

        .. code-block:: none

                            0
            metadata        3
            table_rows      3
            table_columns   1
            description    24


        :return: pd.DataFrame
        """
        df = pd.DataFrame({0: []}, dtype=object)
        df.loc["metadata", 0] = self.metadata.size
        if self.table is None:
            df.loc["table_rows"] = 0
            df.loc["table_columns"] = 0
        else:
            df.loc["table_rows"] = len(self.table)
            df.loc["table_columns"] = len(self.table.columns)
        df.loc["description", 0] = len(self.description)
        return df

    def _gen_default_attributes(self, default_attributes):
        """create default Attributes in data.metadata"""
        for attr_name, value, dtype, unit, description, required in default_attributes:
            self.metadata.set_attr(name=attr_name,
                                   value=value,
                                   dtype=dtype,
                                   description=description)

    def _get_uuid(self):
        return self.metadata.get(self.SDATA_UUID).value
        # return self._uuid

    def _set_uuid(self, value):
        if isinstance(value, str):
            try:
                uuid.UUID(value)
                self.metadata.set_attr(self.SDATA_UUID, uuid.UUID(value).hex)
            except ValueError as exp:
                logger.warning("data.uuid: %s" % exp)
                raise Sdata_Uuid_Exeption("got invalid uuid str '{}'".format(
                    str(value)))
        elif isinstance(value, uuid.UUID):
            self.metadata.set_attr(self.SDATA_UUID, value.hex)
        else:
            logger.error("Data.uuid: invalid uuid '{}'".format(value))
            raise Exception("Data.uuid: invalid uuid '{}'".format(value))

    uuid = property(fget=_get_uuid, fset=_set_uuid, doc="uuid of the object")

    def _get_name(self):
        # return self._name
        return self.metadata.get(self.SDATA_NAME).value

    def _set_name(self, value):
        if isinstance(value, str):
            try:
                self.metadata.set_attr(self.SDATA_NAME, str(value)[:256])
            except ValueError as exp:
                logger.warning("data.name: %s" % exp)
        else:
            # self._name = str(value)[:256]
            self.metadata.set_attr(self.SDATA_NAME, str(value)[:256])

    name = property(fget=_get_name, fset=_set_name, doc="name of the object")

    def _get_project(self):
        return self.metadata.get(self.SDATA_PROJECT).value

    def _set_project(self, value):
        if isinstance(value, str):
            try:
                self.metadata.set_attr(self.SDATA_PROJECT, str(value)[:256])
            except ValueError as exp:
                logger.warning("data.project: %s" % exp)
        else:
            # self._name = str(value)[:256]
            self.metadata.set_attr(self.SDATA_PROJECT, str(value)[:256])

    project = property(fget=_get_project,
                       fset=_set_project,
                       doc="name of the project")

    def _get_description(self):
        return self._description

    def _set_description(self, value):
        if isinstance(value, str):
            try:
                self._description = str(value)
            except ValueError as exp:
                logger.warning("data.name: %s" % exp)
        else:
            self._description = str(value)

    description = property(fget=_get_description,
                           fset=_set_description,
                           doc="description of the object")

    @property
    def filename(self):

        validchars = "-_.() "
        out = ""

        name = "{}".format(self.name)

        for c in name:
            if str.isalpha(c) or str.isdigit(c) or (c in validchars):
                out += c
            else:
                out += "_"
        return out

    def _get_prefix(self):
        return self._prefix

    def _set_prefix(self, value):
        if isinstance(value, str):
            try:
                self._prefix = value[:256]
            except ValueError as exp:
                logger.warning("data.prefix: %s" % exp)
        else:
            self._prefix = str(value)[:256]

    prefix = property(fget=_get_prefix,
                      fset=_set_prefix,
                      doc="prefix of the object name")

    def _get_table(self):
        return self._table

    def _set_table(self, df):
        if isinstance(df, pd.DataFrame):
            self._table = df
            if self._table.index.name is None:
                self._table.index.name = "index"

    table = property(fget=_get_table,
                     fset=_set_table,
                     doc="table object(pandas.DataFrame)")
    df = table

    def description_to_df(self):
        """get description as DataFrame

        :return: DataFrame of description lines
        """
        return pd.DataFrame(self.description.splitlines())

    def description_from_df(self, df):
        """set description from DataFrame of lines

        :return:
        """
        if df is not None and isinstance(df, pd.DataFrame) and len(df) > 0:
            lines = df.iloc[:, 0]
            lines = lines.astype(str)
            self.description = "\n".join(lines.values)

    def to_folder(self, path, dtype="csv"):
        """export data to folder

        :param path:
        :param dtype:
        :return:
        """

        if dtype not in ["csv", "xlsx"]:
            dtype = "xlsx"
        if not os.path.exists(path):
            try:
                os.makedirs(path)
            except OSError as exp:
                logger.error(exp)
        else:
            self.clear_folder(path)

        self.metadata.set_attr(name="class",
                               value=self.__class__.__name__,
                               description="object class",
                               unit="-",
                               dtype="str")
        self.metadata.set_attr(name="uuid",
                               value=self.uuid,
                               description="object uuid",
                               unit="-",
                               dtype="str")
        self.metadata.set_attr(name="name",
                               value=self.name,
                               description="object name",
                               unit="-",
                               dtype="str")

        if dtype == "csv":
            metadata_filepath = os.path.join(path, "metadata.csv")
            logger.debug("export meta csv '{}'".format(metadata_filepath))
            self.metadata.to_csv(metadata_filepath)

            # table export
            if isinstance(self._table, pd.DataFrame) and len(self._table) > 0:
                exportpath = os.path.join(path, "{}.csv".format(self.osname))
                self._table.to_csv(exportpath, index=False)
        if dtype == "xlsx":
            if not isinstance(self._table, pd.DataFrame):
                self.table = pd.DataFrame()
            exportpath = os.path.join(path, "{}.xlsx".format(self.osname))
            self.to_xlsx(exportpath)
        # group export
        for data in self.group.values():
            exportpath = os.path.join(
                path, "{}-{}".format(data.__class__.__name__.lower(),
                                     data.osname))
            data.to_folder(exportpath, dtype=dtype)
        return path

    @classmethod
    def from_folder(cls, path):
        """sdata object instance

        :param path:
        :return:
        """
        # data = Data.from_folder(path)

        data = cls()
        if not os.path.exists(path):
            logger.error(
                "from_folder error: path '{}' not exists.".format(path))
            return data

        data.metadata = data._load_metadata(path)
        try:
            data.uuid = data.metadata.get_attr("uuid").value
            data.name = data.metadata.get_attr("name").value
        except Exception as exp:
            logger.error("Data.from_folder: {}".format(
                data.metadata.to_dict()))
            raise

        # table import
        files = [
            x for x in os.listdir(path)
            if not os.path.isdir(os.path.join(path, x))
            and not x.startswith("metadata")
        ]
        if len(files) == 1:
            assert len(
                files) == 1, "invalid number of files for Table '{}'".format(
                    files)
            importpath = os.path.join(path, files[0])
            print("read table {}".format(importpath))
            # data._table = pd.read_csv(importpath)

        if not os.path.exists(path):
            return cls()
        metadata = cls._load_metadata(path)
        data = cls()
        data.metadata = metadata
        data.uuid = data.metadata.get_attr("uuid").value
        data.name = data.metadata.get_attr("name").value

        folders = [
            x for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))
        ]
        for folder in folders:
            subfolder = os.path.join(path, folder)
            data_ = data.from_folder(subfolder)
            subdata = data_.from_folder(subfolder)
            data.add_data(subdata)
        return data

    @staticmethod
    def clear_folder(path):
        """delete subfolder in export folder

        :param path: path
        :return: None
        """
        def is_valid(path):
            prefix = path.split("-")[0]
            if prefix in [x.lower() for x in SDATACLS.keys()]:
                return True
            else:
                return False

        subfolders = [
            x for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))
        ]
        valid_subfolders = [x for x in subfolders if is_valid(x)]
        for subfolder in valid_subfolders:
            try:
                subfolder = os.path.join(path, subfolder)
                logger.debug("clear_folder: rm {}".format(subfolder))
                shutil.rmtree(subfolder)
            except OSError as exp:
                raise

    @staticmethod
    def _load_metadata(path):
        """load metadata from csv

        :returns: Metadata instance"""
        metadata_filepath = os.path.join(path, "metadata.csv")
        if os.path.exists(metadata_filepath):
            metadata = Metadata().from_csv(metadata_filepath)
        else:
            metadata = Metadata()
        return metadata

    @staticmethod
    def _get_class_from_metadata(metadata):
        """get class object from metadata

        :returns: relevant sdata class object"""
        classattr = metadata.get_attr("class")
        if classattr is not None:
            sdataclassname = classattr.value
            sdatacls = SDATACLS.get(sdataclassname)
            if sdataclassname not in SDATACLS:
                logger.warning("unsupported cls '{}'".format(sdataclassname))
                sdatacls = Data
        else:
            logger.warning("cls not defined '{}'".format(metadata))
            sdatacls = None
        return sdatacls

    @property
    def osname(self):
        """:returns: os compatible name (ascii?)"""
        return self.asciiname.lower()

    @property
    def asciiname(self):
        name = copy.copy(self.name)
        mapper = [("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("Ä", "Ae"),
                  ("Ö", "Oe"), ("Ü", "Ue"), ("ß", "sz"), (" ", "_"),
                  ("/", "_"), ("\\", "_")]
        for k, v in mapper:
            name = name.replace(k, v)
        return name.encode('ascii', 'replace').decode("ascii")

    def verify_attributes(self):
        """check mandatory attributes"""
        invalid_attrs = []
        # attr_defs = ["name", "value", "dtype", "unit", "description", "required"]
        for attr_defs in self.ATTR_NAMES:
            required = attr_defs[5]
            if required is False:
                continue
            attr = self.metadata.get_attr(attr_defs[0])
            if attr is None:
                invalid_attrs.append(attr_defs[0])
            elif attr.value is None:
                invalid_attrs.append(attr.name)
        return invalid_attrs

    def __str__(self):
        return f"({self.__class__.__name__} '{self.name}':{self.uuid})"

    __repr__ = __str__

    def get_group(self):
        return self._group

    group = property(get_group, doc="get group")

    def keys(self):
        """get all child objects uuids

        :return: list of uuid's
        """
        return list(self.group.keys())

    def values(self):
        """get all child objects

        :return: list of child objects
        """
        return list(self.group.values())

    def items(self):
        """get all child objects

        :return: [(child uuid, child objects), ]
        """
        return list(self.group.items())

    def clear_group(self):
        """clear group dict"""
        self._group = OrderedDict()

    def add_data(self, data):
        """add data, if data.name is unique"""
        if hasattr(data, "metadata"):
            names = [dat.name.lower() for uid, dat in self.group.items()]
            if data.name.lower() in names:
                logger.error("{}: name '{}' aready exists".format(
                    data.__class__.__name__, data.name))
                return
            self.group[data.uuid] = data
        else:
            logger.warning(
                f"ignore data {data}, {data.__class__.__name__} (wrong type!)")

    def get_data_by_uuid(self, uid):
        """get data by uuid"""
        return self.group.get(uid)

    def get_data_by_name(self, name):
        """:return obj by name"""
        d = dict([(obj.name, uid) for uid, obj in self.group.items()])
        uid = d.get(name)
        return self.get_data_by_uuid(uid)

    def tree_folder(self,
                    dir,
                    padding="  ",
                    print_files=True,
                    hidden_files=False,
                    last=True):
        """print tree folder structure"""
        if last is False:
            print(padding[:-1] + '├─' + os.path.basename(os.path.abspath(dir)))
        else:
            print(padding[:-1] + '└─' + os.path.basename(os.path.abspath(dir)))
        padding = padding + ' '
        files = []
        if print_files:
            files = [
                x for x in sorted(os.listdir(dir)) if not x.startswith(".")
            ]
        else:
            files = [
                x for x in sorted(os.listdir(dir))
                if os.path.isdir(dir + os.sep + x)
            ]

        # metadata first
        metafiles = [f for f in files if f.startswith("metadata")]
        files = [x for x in files if x not in metafiles]
        files = metafiles + sorted(files)

        for count, file in enumerate(sorted(files)):
            # print(padding + '|')
            path = dir + os.sep + file
            if os.path.isdir(path):
                if count == (len(files) - 1):
                    self.tree_folder(path,
                                     padding + ' ',
                                     print_files,
                                     last=True)
                else:
                    self.tree_folder(path,
                                     padding + '|',
                                     print_files,
                                     last=False)
            else:
                if count == (len(files) - 1):
                    print(padding + '└─' + file)
                else:
                    print(padding + '├─' + file)

    def dir(self):
        """returns a nested list of all child objects

        :return: list of sdata.Data objects
        """
        return [(x.name, x.dir()) for x in self.group.values()]

    def to_xlsx_byteio(self):
        """get xlsx as byteio

        :return: BytesIO
        """
        def adjust_col_width(sheetname, df, writer, width=40):
            worksheet = writer.sheets[sheetname]  # pull worksheet object
            worksheet.set_column(0, 0, width)
            for idx, col in enumerate(df):  # loop through all columns
                # series = df[col]
                # max_len = max((
                #     series.astype(str, raise_on_error=False).map(len).max(),  # len of largest item
                #     len(str(series.name))  # len of column name/header
                #     )) + 1  # adding a little extra space
                worksheet.set_column(idx + 1, idx + 1, width)

        output = BytesIO()
        writer = pd.ExcelWriter(output, engine='xlsxwriter')
        self.metadata.df.to_excel(writer, sheet_name='metadata')
        adjust_col_width('metadata', self.metadata.df, writer)

        self.df.to_excel(writer, sheet_name='table')
        adjust_col_width('table', self.table, writer, width=15)

        df_description = pd.DataFrame(self.description.splitlines())
        df_description.to_excel(writer,
                                sheet_name='description',
                                index=False,
                                header=None)
        adjust_col_width('description', df_description, writer, width=200)

        writer.save()
        processed_data = output.getvalue()
        return processed_data

    def to_xlsx_base64(self):
        """get xlsx as byteio base64 encoded

        :return: base64
        """
        val = self.to_xlsx_byteio()
        b64 = base64.b64encode(val)
        return b64

    def get_download_link(self):
        """Generates a link allowing the data in a given panda dataframe to be downloaded
        in:  dataframe
        out: href string
        """
        b64 = self.to_xlsx_base64()
        return '<a href="data:application/octet-stream;base64,{1}" download="{0}.xlsx">Download {0}.xlsx file</a>'.format(
            self.osname, b64.decode())

    def to_xlsx(self, filepath=None):
        """export atrributes and data to excel

        :param filepath:
        :return:
        """
        def adjust_col_width(sheetname, df, writer, width=40):
            worksheet = writer.sheets[sheetname]  # pull worksheet object
            worksheet.set_column(0, 0, width)
            for idx, col in enumerate(df):  # loop through all columns
                # series = df[col]
                # max_len = max((
                #     series.astype(str, raise_on_error=False).map(len).max(),  # len of largest item
                #     len(str(series.name))  # len of column name/header
                #     )) + 1  # adding a little extra space
                worksheet.set_column(idx + 1, idx + 1, width)

        with pd.ExcelWriter(filepath) as writer:

            # metadata
            # dfm = pd.DataFrame.from_dict(self.metadata, orient="index", columns=["value"])
            dfm = self.metadata.to_dataframe()

            # dfm = dfm.sort_index()
            dfm.index.name = "key"
            dfm.to_excel(writer, sheet_name='metadata', index=False)
            adjust_col_width('metadata', dfm, writer)

            # data
            if self.table is not None:
                if self._table.index.name is None:
                    self._table.index.name = "index"
                self.table.to_excel(writer, sheet_name='table')
                adjust_col_width('table', self.table, writer, width=15)
            else:
                df = pd.DataFrame()
                df.index.name = "index"
                df.to_excel(writer, sheet_name='table')
                adjust_col_width('table', df, writer, width=15)

            df_description = pd.DataFrame(self.description.splitlines())
            df_description.to_excel(writer,
                                    sheet_name='description',
                                    index=False,
                                    header=None)
            adjust_col_width('description', df_description, writer, width=200)

            # # raw data
            # self.df_raw.index.name = "index"
            # self.df_raw.to_excel(writer, sheet_name='df_raw')
            # adjust_col_width('df_raw', self.df_raw, writer, width=15)

    @classmethod
    def from_xlsx(cls, filepath):
        """save table as xlsx

        :param filepath:
        :return:
        """
        try:
            if os.path.exists(filepath):
                wb = openpyxl.load_workbook(filename=filepath)
                sheetnames = wb.sheetnames

                tt = cls(name=filepath)

                # read df
                if "table" in sheetnames:
                    tt.table = pd.read_excel(filepath,
                                             sheet_name="table",
                                             index_col=0)
                else:
                    logger.info("no table data in '{}'".format(filepath))
                dfm = pd.read_excel(filepath, sheet_name="metadata")
                dfm = dfm.set_index(dfm.name.values)
                # dfm["value"] = dfm["value"].replace(np.nan, None)
                dfm["description"] = dfm["description"].replace(np.nan, '')
                dfm["label"] = dfm["label"].replace(np.nan, '')
                # print("!data.from_xlsx", dfm)
                tt.metadata = tt.metadata.from_dataframe(dfm)

                # read description
                if "description" in sheetnames:
                    cells = []
                    for cell in wb["description"]["A"]:
                        if cell.value is not None:
                            cells.append(cell.value)
                        else:
                            cells.append("")
                    tt.description = "\n".join(cells)
                else:
                    logger.info("no description in '{}'".format(filepath))

                return tt
            else:
                raise Exception(
                    "excel file '{}' not available".format(filepath))
        except Exception as exp:
            raise

    def to_json(self, filepath=None):
        """export Data in json format

        :param filepath: export file path (default:None)
        :return: json str
        """

        if self.table is not None:
            json_table = self.table.to_dict()
        else:
            json_table = {}

        j = {
            "metadata": self.metadata.to_dict(),
            "table": json_table,
            "description": self.description
        }
        if filepath:
            with open(filepath, "w") as fh:
                json.dump(j, fh)
        else:
            return json.dumps(j)

    @classmethod
    def from_json(cls, s=None, filepath=None):
        """create Data from json str or file

        :param s: json str
        :param filepath:
        :return: sdata.Data
        """
        data = cls(name="N.N.")
        if s is None and filepath is not None:
            with open(filepath, "r") as fh:
                d = json.load(fh)
        elif s is None and filepath is None:
            logger.error("data.from_json: no json data available")
            return
        elif s is not None and filepath is None:
            d = json.loads(s)
        else:
            logger.error("data.from_json: unexpected error")
            d = None

        if d:
            if "metadata" in d.keys():
                data.metadata.update_from_dict(d["metadata"])
            else:
                logger.error("Data.from_json: table not available")

            if "table" in d.keys():
                data.table = pd.DataFrame.from_dict(d["table"])
                # data.table = pd.read_json(json.dumps(d["table"]))
                # data.table = pd.read_json(d["table"])
            else:
                logger.error("Data.from_json: metadata not available")

            if "description" in d.keys():
                data.description = d["description"]
            else:
                logger.error("Data.from_json: description not available")

        return data

    @classmethod
    def from_url(cls, url=None, stype=None):
        """create Data from json str or file

        :param url: url
        :param stype: "json" ("xlsx", "csv")
        :return: sdata.Data
        """

        supported_stypes = ["json"]

        if stype not in supported_stypes:
            raise NotADirectoryError(
                "stype '{}' is not supported".format(stype))
            return

        raw = requests.get(url).text
        if stype == "json":
            data = cls.from_json(raw)
            return data

    def to_csv(self, filepath=None):
        """export sdata.Data to csv

        :param filepath:
        :return:
        """

        exportlines = []
        exportlines.append(
            self.metadata.to_csv_header(prefix="#;", sep=";", filepath=None))
        if self.df is not None:
            exportlines.append(self.df.to_csv(sep=";"))

        exportstr = "".join(exportlines)

        if filepath is None:
            return exportstr
        else:
            with open(filepath, "w") as fh:
                fh.write(exportstr)

    @classmethod
    def from_csv(cls, s=None, filepath=None, sep=";"):
        """import sdata.Data from csv

        :param s: csv str
        :param filepath:
        :param sep: separator (default=";")
        :return: sdata.Data
        """
        data = cls()
        if filepath:
            df = pd.read_csv(filepath, sep=";", comment="#", index_col=0)
            sio = open(filepath, "r")
        elif s is not None:
            sio = StringIO(s)
            pd.read_csv(sio, sep=";", comment="#")
            sio.seek(0)
        else:
            logger.error("data.from_csv: no csv data available")
            raise

        attritute_list = []
        for line in sio:
            if line.startswith("#;"):
                line = line.rstrip("\n")
                line = line.split(sep)
                attritute_list.append(line[1:8])
        data.metadata = Metadata.from_list(attritute_list)
        data.table = df
        return data

    def to_hdf5(self, filepath, **kwargs):
        """export sdata.Data to hdf5

        :param filepath:
        :param complib: default='zlib' ['zlib', 'lzo', 'bzip2', 'blosc', 'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd']
        :param complevel: default=9 [0-9]

        :return:
        """
        if not isinstance(self.df, pd.DataFrame):
            df = pd.DataFrame()
        else:
            df = self.df
        kwargs["mode"] = "w"
        if kwargs.get("complib") is None:
            kwargs["complib"] = "zlib"

        if kwargs.get("complevel") is None:
            kwargs["complevel"] = 9

        with pd.HDFStore(filepath, **kwargs) as hdf:
            hdf.put('metadata'.format(self.uuid),
                    self.metadata.df,
                    format='fixed',
                    data_columns=True)
            hdf.put('table'.format(self.uuid),
                    df,
                    format='fixed',
                    data_columns=True)
            hdf.put('description'.format(self.uuid),
                    self.description_to_df(),
                    format='fixed',
                    data_columns=True)

    @classmethod
    def metadata_from_hdf5(cls, filepath, **kwargs):
        """import sdata.Data.Metadata from hdf5

        :param filepath:
        :return: sdata.Data
        """
        if not os.path.exists:
            logger.error("hdf5 file '{}' not available".format(filepath))
            return

        with pd.HDFStore(filepath, mode="r+") as hdf:
            metadata_path = "/metadata".format(uuid)
            df_metadata = hdf.get(metadata_path)
            metadata = Metadata.from_dataframe(df_metadata)
            return metadata

    @classmethod
    def from_hdf5(cls, filepath, **kwargs):
        """import sdata.Data from hdf5

        :param filepath:
        :return: sdata.Data
        """
        if not os.path.exists:
            logger.error("hdf5 file '{}' not available".format(filepath))
            return

        with pd.HDFStore(filepath, mode="r+") as hdf:
            metadata_path = "/metadata".format(uuid)
            table_path = "/table".format(uuid)
            description_path = "/description".format(uuid)
            df_metadata = hdf.get(metadata_path)
            df_table = hdf.get(table_path)
            df_description = hdf.get(description_path)
            metadata = Metadata.from_dataframe(df_metadata)
            # logger.debug("hdf {}".format(metadata.get("!sdata_uuid").value))
            data = Data(metadata=metadata, table=df_table)
            data.description_from_df(df_description)

        return data

    def to_html(self, filepath, xlsx=True, style=None):
        """export Data to html

        :param filepath:
        :param xlsx:
        :param style:
        :return:
        """

        table_values = self.df.head()
        table_headers = self.df.columns

        table_description_values = self.df.describe()
        table_description_headers = self.df.describe().columns

        metadata_values = self.metadata.df.head().values
        metadata_headers = self.metadata.df.columns

        if xlsx is True:
            xlsx_tag = self.get_download_link()
        else:
            xlsx_tag = ""

        param = {
            "title":
            "{0} [{1}]".format(self.osname, self.uuid),
            "description":
            self.description,
            "metadata":
            tabulate(metadata_values, metadata_headers, tablefmt="html"),
            "table":
            tabulate(table_values, table_headers, tablefmt="html"),
            "table_description":
            tabulate(table_description_values,
                     table_description_headers,
                     tablefmt="html"),
            "xlsx_tag":
            xlsx_tag,
            "sdata":
            "created with sdata v{}.".format(__version__),
            "now":
            "{}".format(now_utc_str()),
        }

        tmpl = """<!DOCTYPE html>
<html lang="de">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{title}</title>
    <style>
    h1 {{ 
       background-color: #00FFFF77;
       color: black;
    }}
    h2 {{ 
       background-color: #00FFFF44;
       color: black;
    }}
    h3 {{ 
       background-color: #00FFFF11;
       color: black;
    }}
    p {{
    	color: black;
    }}
    table, th, td, caption {{
    border: 1px solid #a0a0a0;
    }}
    
    table {{
      border-collapse: collapse;
      border-spacing: 1em;
      border-width: thin 0 0 thin;
      margin: 0 0 1em;
      table-layout: auto;
      max-width: 100%;
      text-align: right;
    }}
    th, td {{
      font-weight: normal;
      text-align: left;
      border-spacing: 1em;
      padding: .1em .3em;
    }}
    th, caption {{
      background-color: #f1f3f4;
      font-weight: 700;
    }}
    </style>
  </head>
  <body>
    <h1>{title}</h1>
    <h2>Download</h2>
    <p">{xlsx_tag}</p>
    <h2>Description</h2>
    <p>{description}</p>
    <h2>Metadata</h2>
    {metadata}
    <h2>Table</h2>
    {table}
    <h3>Table Description</h3>
    {table_description}
    <p>{sdata}</p>
    <p>{now}</p>
  </body>
</html>""".format(**param)
        try:
            with open(filepath, "w") as fh:
                fh.write(tmpl)
        except Exception as exp:
            raise

    def copy(self, **kwargs):
        """create a copy of the Data object

        .. code-block:: python

            data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable")
            datac = data.copy()
            print("data  {0.uuid}".format(data))
            print("datac {0.uuid}".format(datac))
            print("datac.metadata['!sdata_parent'] {0.value}".format(datac.metadata["sdata_parent"]))

        .. code-block::

            data  38b26864e7794f5182d38459bab85842
            datac 2c4eb15900af435d8cd9c8573ca777e2
            datac.metadata['!sdata_parent'] 38b26864e7794f5182d38459bab85842

        :return: Data
        """
        data = copy.deepcopy(self)
        data.metadata.add(self.SDATA_PARENT, self.uuid)
        data.metadata.add(self.SDATA_UUID, self.gen_uuid())
        data.metadata.add(self.SDATA_MTIME, now_utc_str(), dtype="str")
        if "uuid" in kwargs:
            data.uuid = kwargs.get("uuid")
        if "name" in kwargs:
            data.name = kwargs.get("name")
        logger.debug(f"make copy of {self.uuid} -> {data.uuid}")

        return data

    def gen_uuid(self):
        """generate new uuid string

        :return: str, e.g. '5fa04a3738e4431dbc34eccea5e795c4'
        """
        return uuid.uuid4().hex

    def refactor(self, fix_columns=True, add_table_metadata=True):
        """helper function

        * to cleanup dataframe column name
        * to define Attributes for all dataframe columns
        """
        if isinstance(self.table, pd.DataFrame):
            mapper = {}
            for old_colname in self.table.columns:
                name, unit = extract_name_unit(old_colname)
                if fix_columns:
                    mapper[old_colname] = name
                if add_table_metadata:
                    old_attr = self.metadata.get(old_colname)
                    if old_attr:
                        logger.info("skip: {}".format(old_attr))
                        self.metadata.relabel(old_colname, name)
                    else:
                        self.metadata.add(name=name,
                                          description=old_colname,
                                          unit=unit,
                                          dtype="float")
            self.table.rename(columns=mapper, inplace=True)