def __init__(self): """ Handler for conversion of storage types between DSS and Tableau Hyper DSS storage types: "string","date","geopoint","geometry","array","map","object","double", "boolean","float","bigint","int","smallint","tinyint" Tableau Hyper storage types: TypeTag.BOOL, TypeTag.BIG_INT, TypeTag.SMALL_INT, TypeTag.INT, TypeTag.NUMERIC, TypeTag.DOUBLE, TypeTag.OID, TypeTag.BYTES, TypeTag.TEXT, TypeTag.VARCHAR, TypeTag.CHAR, TypeTag.JSON, TypeTag.DATE, TypeTag.INTERVAL, TypeTag.TIME, TypeTag.TIMESTAMP, TypeTag.TIMESTAMP_TZ, TypeTag.GEOGRAPHY """ handle_null = lambda f: lambda x: None if pd.isna(x) else f(x) # Mapping DSS to Tableau Hyper types self.mapping_dss_to_hyper = { 'array': (SqlType.text(), handle_null(str)), 'bigint': (SqlType.big_int(), handle_null(int)), 'boolean': (SqlType.bool(), handle_null(bool)), 'date': (SqlType.timestamp(), handle_null(to_hyper_timestamp)), 'double': (SqlType.double(), handle_null(float)), 'float': (SqlType.double(), handle_null(float)), 'geometry': (SqlType.text(), handle_null(str)), 'geopoint': (SqlType.geography(), handle_null(to_hyper_geography)), 'int': (SqlType.int(), handle_null(int)), 'map': (SqlType.text(), handle_null(str)), 'object': (SqlType.text(), handle_null(str)), 'smallint': (SqlType.small_int(), handle_null(int)), 'string': (SqlType.text(), handle_null(str)), 'tinyint': (SqlType.small_int(), handle_null(int)), } # Mapping Tableau Hyper to DSS types self.mapping_hyper_to_dss = { TypeTag.BIG_INT: ('bigint', handle_null(int)), TypeTag.BYTES: ('string', handle_null(str)), TypeTag.BOOL: ('boolean', handle_null(bool)), TypeTag.CHAR: ('string', handle_null(str)), TypeTag.DATE: ('date', handle_null(to_dss_date)), TypeTag.DOUBLE: ('double', handle_null(float)), TypeTag.GEOGRAPHY: ('geopoint', handle_null(to_dss_geopoint)), TypeTag.INT: ('int', handle_null(int)), TypeTag.INTERVAL: ('string', handle_null(str)), TypeTag.JSON: ('string', handle_null(str)), TypeTag.NUMERIC: ('double', handle_null(float)), TypeTag.OID: ('string', handle_null(str)), TypeTag.SMALL_INT: ('smallint', handle_null(int)), TypeTag.TEXT: ('string', handle_null(str)), TypeTag.TIME: ('string', handle_null(str)), TypeTag.TIMESTAMP: ('date', handle_null(to_dss_timestamp)), TypeTag.TIMESTAMP_TZ: ('string', handle_null(str)), TypeTag.VARCHAR: ('string', handle_null(str)) }
def run(self, args): """ Runs the command :param args: Arguments from argparse.Namespace """ input_file = Path(args.input_file) print("Listing tables with spatial columns") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Connects to existing Hyper file with Connection(endpoint=hyper.endpoint, database=input_file) as connection: catalog = connection.catalog # Iterates over all schemas in the input file for schema_name in catalog.get_schema_names(): # Iterates over all tables in the current schema for table in catalog.get_table_names(schema=schema_name): table_definition = catalog.get_table_definition( name=table) rows_in_table = connection.execute_scalar_query( query=f"SELECT COUNT(*) FROM {table}") spatial_columns = [ c.name for c in table_definition.columns if c.type == SqlType.geography() ] if spatial_columns: print( f"Table {table} with {rows_in_table} rows has" f" {len(spatial_columns)} spatial columns: {spatial_columns}" ) else: print( f"Table {table} with {rows_in_table} rows has no spatial columns" )
NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ TableName, Name, \ HyperException # The table is called "Extract" and will be created in the "Extract" schema. # This has historically been the default table name and schema for extracts created by Tableau extract_table = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location', type=SqlType.geography(), nullability=NOT_NULLABLE) ]) def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
def run(self, args): """ Runs the command :param args: Arguments from argparse.Namespace """ input_file = Path(args.input_file) output_file = Path(args.output_file) if args.mode == AdjustVertexOrderMode.INVERT: print("Reversing vertex order of polygons in spatial columns") else: print( "Adjusting vertex order of polygons assuming data source with " "flat - earth topology in spatial columns") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint) as connection: catalog = connection.catalog # Create output database file, fails if the file already exists catalog.create_database(output_file) # Attach input and output database files catalog.attach_database(input_file, "input") catalog.attach_database(output_file, "output") # Iterate over all schemas in the input file for schema_name in catalog.get_schema_names("input"): # Create the schema in the output file catalog.create_schema_if_not_exists( SchemaName("output", schema_name.name)) # Iterate over all tables in the input schema for in_table in catalog.get_table_names(schema_name): table_definition = catalog.get_table_definition( name=in_table) columns = table_definition.columns out_table = TableName("output", schema_name.name, in_table.name) out_table_definition = TableDefinition( out_table, columns) # Create the table in the output file with the same table definition. # Note that any constraints on the table in the input file # will not be present in the table in the output file catalog.create_table(out_table_definition) spatial_columns = [ c.name for c in columns if c.type == SqlType.geography() ] in_column_modifiers = [ f"geo_{args.mode.value}_vertex_order({c.name})" if c.type == SqlType.geography() else f"{c.name}" for c in columns ] if len(spatial_columns) > 0: print( f"Copying table {in_table} with {len(spatial_columns)} " f"spatial columns: {spatial_columns}...") else: print( f"Copying table {in_table} with no spatial columns..." ) row_count = connection.execute_command( f"INSERT INTO {out_table} SELECT {','.join(in_column_modifiers)} FROM {in_table}" ) print(f" {row_count} rows copied")