Exemplo n.º 1
0
    def compute_data_type(self, lst: LuxSQLTable):
        """
        Function which the equivalent Pandas data type of each variable within the specified Lux DataFrame's SQL table.
        Populates the metadata parameters of the specified Lux DataFrame.

        Parameters
        ----------
        lst: lux.LuxSQLTable
            lux.LuxSQLTable object whose metadata will be calculated

        Returns
        -------
        None
        """
        data_type = {}
        sql_dtypes = {}
        self.get_cardinality(lst)
        if "." in lst.table_name:
            table_name = lst.table_name[lst.table_name.index(".") + 1 :]
        else:
            table_name = lst.table_name
        # get the data types of the attributes in the SQL table
        for attr in list(lst.columns):
            datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format(
                table_name, attr
            )
            datatype = list(pandas.read_sql(datatype_query, lux.config.SQLconnection)["data_type"])[0]

            sql_dtypes[attr] = datatype
        for attr in list(lst.columns):
            if str(attr).lower() in ["month", "year"]:
                data_type[attr] = "temporal"
            elif sql_dtypes[attr] in [
                "character",
                "character varying",
                "boolean",
                "uuid",
                "text",
            ]:
                data_type[attr] = "nominal"
            elif sql_dtypes[attr] in [
                "integer",
                "numeric",
                "decimal",
                "bigint",
                "real",
                "smallint",
                "smallserial",
                "serial",
                "double precision",
            ]:
                if lst.cardinality[attr] < 13:
                    data_type[attr] = "nominal"
                elif check_if_id_like(lst, attr):
                    lst._data_type[attr] = "id"
                else:
                    data_type[attr] = "quantitative"
            elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]:
                data_type[attr] = "temporal"
        lst._data_type = data_type
Exemplo n.º 2
0
    def compute_data_type(self, tbl: LuxSQLTable):
        """
        Function which the equivalent Pandas data type of each variable within the specified Lux DataFrame's SQL table.
        Populates the metadata parameters of the specified Lux DataFrame.

        Parameters
        ----------
        tbl: lux.LuxSQLTable
            lux.LuxSQLTable object whose metadata will be calculated

        Returns
        -------
        None
        """
        data_type = {}
        self.get_cardinality(tbl)
        if "." in tbl.table_name:
            table_name = tbl.table_name[tbl.table_name.index(".") + 1 :]
        else:
            table_name = tbl.table_name
        # get the data types of the attributes in the SQL table
        for attr in list(tbl.columns):
            datatype_query = lux.config.query_templates['datatype_query'].format(table_name = table_name, attribute = attr)
            datatype = list(pandas.read_sql(datatype_query, lux.config.SQLconnection)["data_type"])[0]
            if str(attr).lower() in {"month", "year"} or "time" in datatype or "date" in datatype:
                data_type[attr] = "temporal"
            elif datatype in {
                "character",
                "character varying",
                "boolean",
                "uuid",
                "text",
            }:
                data_type[attr] = "nominal"
            elif datatype in {
                "integer",
                "numeric",
                "decimal",
                "bigint",
                "real",
                "smallint",
                "smallserial",
                "serial",
                "double",
                "double precision",
            }:
                if tbl.cardinality[attr] < 13:
                    data_type[attr] = "nominal"
                elif check_if_id_like(tbl, attr):
                    data_type[attr] = "id"
                else:
                    data_type[attr] = "quantitative"

        tbl._data_type = data_type
Exemplo n.º 3
0
    def compute_dataset_metadata(self, tbl: LuxSQLTable):
        """
        Function which computes the metadata required for the Lux recommendation system.
        Populates the metadata parameters of the specified Lux DataFrame.

        Parameters
        ----------
        tbl: lux.LuxSQLTable
            lux.LuxSQLTable object whose metadata will be calculated

        Returns
        -------
        None
        """
        if not tbl._setup_done:
            self.get_SQL_attributes(tbl)
        tbl._data_type = {}
        #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this
        #####      in the initialization and do it just once
        self.compute_data_type(tbl)
        self.compute_stats(tbl)