示例#1
0
    def __init__(self):
        """
        Handler for conversion of storage types between DSS and Tableau Hyper

        DSS storage types:

        "string","date","geopoint","geometry","array","map","object","double",
        "boolean","float","bigint","int","smallint","tinyint"

        Tableau Hyper storage types:

        TypeTag.BOOL, TypeTag.BIG_INT, TypeTag.SMALL_INT, TypeTag.INT, TypeTag.NUMERIC,
        TypeTag.DOUBLE, TypeTag.OID, TypeTag.BYTES, TypeTag.TEXT, TypeTag.VARCHAR, TypeTag.CHAR,
        TypeTag.JSON, TypeTag.DATE, TypeTag.INTERVAL, TypeTag.TIME, TypeTag.TIMESTAMP,
        TypeTag.TIMESTAMP_TZ, TypeTag.GEOGRAPHY

        """
        handle_null = lambda f: lambda x: None if pd.isna(x) else f(x)

        # Mapping DSS to Tableau Hyper types
        self.mapping_dss_to_hyper = {
            'array': (SqlType.text(), handle_null(str)),
            'bigint': (SqlType.big_int(), handle_null(int)),
            'boolean': (SqlType.bool(), handle_null(bool)),
            'date': (SqlType.timestamp(), handle_null(to_hyper_timestamp)),
            'double': (SqlType.double(), handle_null(float)),
            'float': (SqlType.double(), handle_null(float)),
            'geometry': (SqlType.text(), handle_null(str)),
            'geopoint': (SqlType.geography(), handle_null(to_hyper_geography)),
            'int': (SqlType.int(), handle_null(int)),
            'map': (SqlType.text(), handle_null(str)),
            'object': (SqlType.text(), handle_null(str)),
            'smallint': (SqlType.small_int(), handle_null(int)),
            'string': (SqlType.text(), handle_null(str)),
            'tinyint': (SqlType.small_int(), handle_null(int)),
        }

        # Mapping Tableau Hyper to DSS types
        self.mapping_hyper_to_dss = {
            TypeTag.BIG_INT: ('bigint', handle_null(int)),
            TypeTag.BYTES: ('string', handle_null(str)),
            TypeTag.BOOL: ('boolean', handle_null(bool)),
            TypeTag.CHAR: ('string', handle_null(str)),
            TypeTag.DATE: ('date', handle_null(to_dss_date)),
            TypeTag.DOUBLE: ('double', handle_null(float)),
            TypeTag.GEOGRAPHY: ('geopoint', handle_null(to_dss_geopoint)),
            TypeTag.INT: ('int', handle_null(int)),
            TypeTag.INTERVAL: ('string', handle_null(str)),
            TypeTag.JSON: ('string', handle_null(str)),
            TypeTag.NUMERIC: ('double', handle_null(float)),
            TypeTag.OID: ('string', handle_null(str)),
            TypeTag.SMALL_INT: ('smallint', handle_null(int)),
            TypeTag.TEXT: ('string', handle_null(str)),
            TypeTag.TIME: ('string', handle_null(str)),
            TypeTag.TIMESTAMP: ('date', handle_null(to_dss_timestamp)),
            TypeTag.TIMESTAMP_TZ: ('string', handle_null(str)),
            TypeTag.VARCHAR: ('string', handle_null(str))
        }
示例#2
0
    def run(self, args):
        """ Runs the command
        :param args: Arguments from argparse.Namespace
        """
        input_file = Path(args.input_file)
        print("Listing tables with spatial columns")

        # Starts the Hyper Process with telemetry enabled to send data to Tableau.
        # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
        with HyperProcess(
                telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
            # Connects to existing Hyper file
            with Connection(endpoint=hyper.endpoint,
                            database=input_file) as connection:
                catalog = connection.catalog
                # Iterates over all schemas in the input file
                for schema_name in catalog.get_schema_names():
                    # Iterates over all tables in the current schema
                    for table in catalog.get_table_names(schema=schema_name):
                        table_definition = catalog.get_table_definition(
                            name=table)
                        rows_in_table = connection.execute_scalar_query(
                            query=f"SELECT COUNT(*) FROM {table}")
                        spatial_columns = [
                            c.name for c in table_definition.columns
                            if c.type == SqlType.geography()
                        ]
                        if spatial_columns:
                            print(
                                f"Table {table} with {rows_in_table} rows has"
                                f" {len(spatial_columns)} spatial columns: {spatial_columns}"
                            )
                        else:
                            print(
                                f"Table {table} with {rows_in_table} rows has no spatial columns"
                            )
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, Name, \
    HyperException

# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Name',
                               type=SqlType.text(),
                               nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Location',
                               type=SqlType.geography(),
                               nullability=NOT_NULLABLE)
    ])


def run_insert_spatial_data_to_a_hyper_file():
    """
    An example of how to add spatial data to a Hyper file.
    """
    print("EXAMPLE - Add spatial data to a Hyper file ")
    path_to_database = Path("spatial_data.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
示例#4
0
    def run(self, args):
        """ Runs the command
        :param args: Arguments from argparse.Namespace
        """
        input_file = Path(args.input_file)
        output_file = Path(args.output_file)
        if args.mode == AdjustVertexOrderMode.INVERT:
            print("Reversing vertex order of polygons in spatial columns")
        else:
            print(
                "Adjusting vertex order of polygons assuming data source with "
                "flat - earth topology in spatial columns")

        # Starts the Hyper Process with telemetry enabled to send data to Tableau.
        # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
        with HyperProcess(
                telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

            with Connection(endpoint=hyper.endpoint) as connection:
                catalog = connection.catalog

                # Create output database file, fails if the file already exists
                catalog.create_database(output_file)

                # Attach input and output database files
                catalog.attach_database(input_file, "input")
                catalog.attach_database(output_file, "output")

                # Iterate over all schemas in the input file
                for schema_name in catalog.get_schema_names("input"):

                    # Create the schema in the output file
                    catalog.create_schema_if_not_exists(
                        SchemaName("output", schema_name.name))

                    # Iterate over all tables in the input schema
                    for in_table in catalog.get_table_names(schema_name):
                        table_definition = catalog.get_table_definition(
                            name=in_table)
                        columns = table_definition.columns

                        out_table = TableName("output", schema_name.name,
                                              in_table.name)
                        out_table_definition = TableDefinition(
                            out_table, columns)

                        # Create the table in the output file with the same table definition.
                        # Note that any constraints on the table in the input file
                        # will not be present in the table in the output file
                        catalog.create_table(out_table_definition)

                        spatial_columns = [
                            c.name for c in columns
                            if c.type == SqlType.geography()
                        ]
                        in_column_modifiers = [
                            f"geo_{args.mode.value}_vertex_order({c.name})"
                            if c.type == SqlType.geography() else f"{c.name}"
                            for c in columns
                        ]

                        if len(spatial_columns) > 0:
                            print(
                                f"Copying table {in_table} with {len(spatial_columns)} "
                                f"spatial columns: {spatial_columns}...")
                        else:
                            print(
                                f"Copying table {in_table} with no spatial columns..."
                            )

                        row_count = connection.execute_command(
                            f"INSERT INTO {out_table} SELECT {','.join(in_column_modifiers)} FROM {in_table}"
                        )
                        print(f"   {row_count} rows copied")