class Extract(): def __init__(self, path="superstore.hyper"): super().__init__() self._path = Path(__file__).parent / path self._table_name = TableName("Extract", "Extract") self._hyper = HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) self._connection = Connection( endpoint=self._hyper.endpoint, database=self._path) def __del__(self): self._connection.close() self._hyper.close() def delete_data(self, date): row_count = self._connection.execute_command( command= f"DELETE FROM {self._table_name} WHERE order_date>='{date}'" ) print(f"The number of deleted rows in table {self._table_name} " f"is {row_count}.\n") def read_extract(self): print(f"These are all rows in the table {self._table_name}:") rows_in_table = self._connection.execute_list_query(query=f"SELECT * FROM {self._table_name}") print(rows_in_table) def insert_data(self, data): with Inserter(self._connection, self._table_name) as inserter: inserter.add_rows(rows=data) inserter.execute()
def __init__(self, path="superstore.hyper"): super().__init__() self._path = Path(__file__).parent / path self._table_name = TableName("Extract", "Extract") self._hyper = HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) self._connection = Connection( endpoint=self._hyper.endpoint, database=self._path)
def open_connection(self): """ Open the connection to the Tableau Hyper file and the database """ self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) self.connection = Connection(self.hyper.endpoint, self.path_to_hyper) logger.info("Opened the connection to Tableau Hyper file")
def create_schema(self, schema_dss, destination_file_path): """ Read the Tableau Hyper file an. :param schema_dss: DSS schema from the DSS dataset to export example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...] :param destination_file_path: :return: """ # Read the destination file of the dss self.output_file = destination_file_path logger.info( "Writing the Tableau Hyper file to the following location: {}". format(destination_file_path)) logger.info( "The dataset to export has the following schema: {}".format( schema_dss)) dss_columns = schema_dss['columns'] dss_storage_types = [ column_descriptor['type'] for column_descriptor in dss_columns ] self.schema_converter.set_dss_storage_types(dss_storage_types) self.is_geo_table = dss_is_geo(schema_dss) logger.info("The input dataset contains a geo column: {}".format( self.is_geo_table)) if not self.schema_name or not self.table_name: logger.warning("Did not received the table or schema name.") raise ValueError("No valid schema or table name received.") logger.info("Received target schema {} and table {}".format( self.schema_name, self.table_name)) # Create the Tableau Hyper schema from the DSS schema self.output_table_definition = TableDefinition( TableName(self.schema_name, self.table_name), self.schema_converter.dss_columns_to_hyper_columns(dss_columns)) # Open connection to file self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) self.connection = Connection(self.hyper.endpoint, self.output_file, CreateMode.CREATE_AND_REPLACE) assert self.connection is not None self.connection.catalog.create_schema(self.schema_name) self.connection.catalog.create_table(self.output_table_definition) # Handle the geo case if self.is_geo_table: logger.info("Detected geo column. Creating a temporary table...") dss_tmp_schema = geo_to_text(schema_dss) dss_tmp_columns = dss_tmp_schema['columns'] self.tmp_table_definition = TableDefinition( TableName(self.schema_name, "tmp_" + self.table_name), self.schema_converter.dss_columns_to_hyper_columns( dss_tmp_columns)) self.connection.catalog.create_table(self.tmp_table_definition) logger.info("Created temporary table")
def test_hyper_columns_to_dss_columns(self): schema_converter = SchemaConversion() path_to_hyper = "data/superstore_sample.hyper" hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) connection = Connection(hyper.endpoint, path_to_hyper) hyper_table = connection.catalog.get_table_definition( TableName('public', 'Customer')) connection.close() hyper.close() dss_columns = schema_converter.hyper_columns_to_dss_columns( hyper_table.columns) return True
def insert_data(): """ Creates a simple .hyper file. For more on this, see the below example: https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py """ print("Creating single table for conversion.") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates new Hyper file # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # Creates schema and table with table_definition. connection.catalog.create_schema( schema=extract_table.table_name.schema_name) connection.catalog.create_table(table_definition=extract_table) # The rows to insert into the "Extract"."Extract" table. data_to_insert = [["DK-13375", "Dennis Kane", 685, "Consumer"], ["EB-13705", "Ed Braxton", 815, "Corporate"]] # Insert the data. with Inserter(connection, extract_table) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute() print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def run_create_hyper_file_from_csv(): path_to_database = Path("Workbook_Info.hyper") process_parameters = { "log_file_max_count": "2", "log_file_size_limit": "100M" } with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper: connection_parameters = {"lc_time": "en_US"} with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE, parameters=connection_parameters) as connection: connection.catalog.create_table(table_definition=Workbook_Info_table) path_to_csv = str(Path(__file__).parent / "workbook_info.csv") count_in_Workbook_Info_table = connection.execute_command( command=f"COPY {Workbook_Info_table.table_name} from {escape_string_literal(path_to_csv)} with " f"(format csv, NULL 'NULL', delimiter ',', header)")
def to_hyper(df, hyper_file_name, custom_schema="Extract", custom_table_name="Extract"): """ Write a Tableau Hyper file from a Pandas DataFrame. Currently can only write single table extracts, which is Tableau's default way of creating an extract. Args: df: Specify which DataFrame you want to output hyper_file_name: Specify the file name such as "Example.hyper" custom_schema: If you need to change the schema name. Defaults to "Extract" custom_table_name: If you need to change the schema name. Defaults to "Extract" Returns: Tableau Hyper file """ # Starts the Hyper Process with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters={"log_config": ""}, ) as hyper: # Creates a .hyper file to put the data into with Connection(hyper.endpoint, hyper_file_name, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(custom_schema) # create a .hyper compatible column definition # from pd DataFrame column names and dtypes # using 3 list comprehensions to loop through # all the columns in the DataFrame column_names = [column for column in df.columns] column_dtype = [dtype for dtype in df.dtypes] hyper_table = TableDefinition( TableName(custom_schema, custom_table_name), [ TableDefinition.Column( column_names[column], dtype_mapper[str( column_dtype[column])]) for column in range(len(column_names)) ], ) connection.catalog.create_table(hyper_table) # Repace NaN with None, otherwise it will not be Null in Tableau df.replace({np.nan: None}, inplace=True) # Insert the data values into the hyper file data_to_insert = df.to_numpy() with Inserter(connection, hyper_table) as inserter: inserter.add_rows(tqdm((row for row in data_to_insert))) inserter.execute()
def run_create_hyper_file_from_csv(): """ Loading data from a csv into a new Hyper file """ print("Load data from CSV into table in new Hyper file") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection( endpoint=hyper.endpoint, database=cf.hyper_file_path, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_table(table_definition=customer_table) # `execute_command` executes a SQL statement and returns the impacted row count. count_in_customer_table = connection.execute_command( command= f"COPY {customer_table.table_name} from {escape_string_literal(cf.input_file_path)} with " f"(format csv, NULL 'NULL', delimiter ',', header)") print( f"The number of rows in table {customer_table.table_name} is {count_in_customer_table}." ) print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def get_table_columns(table): """ Returns the list of the columns in the table. This is a list of dictionaries having the following keys: * name: the Tableau caption of the column (when found) * type: the data type contained in that column * nullability: a flag indicating if the column may contain null values """ return_value = [] columns_mappings = get_columns_mappings() with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint, database=settings.HYPER_FILE_PATH) as connection: table_definition = connection.catalog.get_table_definition( name=table) for column in table_definition.columns: name = str(column.name).replace('"', "") if name in columns_mappings.keys(): name = columns_mappings[name] return_value.append({ "name": name, "type": str(column.type), "nullability": str(column.nullability), }) return return_value
def append(self, sql: str = None, df: DataFrame = None): """Appends new data to a Hyper File""" # First, must materialize the new data back to the driver node if sql is not None and df is None: self.sql = sql self.df = get_spark_dataframe(sql) elif sql is None and df is not None: self.df = df else: raise ValueError( 'Missing either SQL statement or Spark DataFrame as argument.') data = HyperUtils.get_rows(self.df) # Convert the Spark DataFrame schema to a Tableau Table Def table_def = HyperUtils.get_table_def(self.df, "Extract", "Extract") # Insert, the new data into Hyper File hyper_database_path = self.path logging.info( f'Inserting new data into Hyper database: {hyper_database_path}') with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=hyper_database_path, create_mode=CreateMode.NONE) as connection: with Inserter(connection, table_def) as inserter: inserter.add_rows(rows=data) inserter.execute()
def _read_table(*, connection: tab_api.Connection, table: TableType) -> pd.DataFrame: if isinstance(table, str): table = tab_api.TableName(table) table_def = connection.catalog.get_table_definition(table) columns = table_def.columns dtypes: Dict[str, str] = {} for column in columns: column_type = pantab_types._ColumnType(column.type, column.nullability) dtypes[column.name.unescaped] = _tableau_to_pandas_type(column_type) with connection.execute_query(f"SELECT * from {table}") as result: df = pd.DataFrame(result) df.columns = dtypes.keys() # The tableauhyperapi.Timestamp class is not implicitly convertible to a datetime # so we need to run an apply against applicable types for key, val in dtypes.items(): if val == "datetime64[ns]": df[key] = df[key].apply(lambda x: x._to_datetime()) elif val == "datetime64[ns, UTC]": df[key] = df[key].apply(lambda x: x._to_datetime()).dt.tz_localize( "UTC") elif val == "timedelta64[ns]": df[key] = df[key].apply(_interval_to_timedelta) df = df.astype(dtypes) df = df.fillna(value=np.nan) # Replace any appearances of None return df
def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint, database='C:/Users/admin/Desktop/extrp1.hyper', create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('extract') connection.catalog.create_table(table_definition=extract_table) # create a path that locates CSV file to be used path_to_csv = 'C:/Users/admin/Desktop/testing1.csv' # `execute_command` executes a SQL statement and returns the impacted row count. count_in_table = connection.execute_command( command= f"COPY {extract_table.table_name} from {escape_string_literal(path_to_csv)} with " f"(format csv, NULL 'NULL', delimiter ',', header)") print(count_in_table) with Inserter(connection, TableName('extract', 'extract')) as inserter: inserter.add_rows(rows=data_to_insert) inserter.execute()
def fn_hyper_handle(self, in_logger, timer, in_dict): timer.start() out_data_frame = None try: # Starts Hyper Process with telemetry enabled/disabled to send data to Tableau or not # To opt in, simply set telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. telemetry_chosen = Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU with HyperProcess(telemetry=telemetry_chosen) as hyper_process: in_logger.debug( self.locale.gettext('Hyper engine process initialized')) in_logger.debug( self.locale.gettext( 'Chosen Telemetry is {telemetry_value}').replace( '{telemetry_value}', str(telemetry_chosen))) timer.stop() timer.start() hyper_create_mode = { 'append': CreateMode.NONE, 'overwrite': CreateMode.CREATE_AND_REPLACE, 'read': CreateMode.NONE, 'delete': CreateMode.NONE, 'update': CreateMode.NONE, } # Connect to an existing .hyper file with Connection(endpoint=hyper_process.endpoint, database=in_dict['hyper file'], create_mode=hyper_create_mode.get( in_dict['action'])) as hyper_connection: in_logger.debug( self.locale.gettext( 'Connection to the Hyper engine using file name "{file_name}" ' + 'has been established').replace( '{file_name}', in_dict['hyper file'])) timer.stop() in_dict['connection'] = hyper_connection if in_dict['action'] == 'read': out_data_frame = self.fn_hyper_read( in_logger, timer, in_dict) elif in_dict['action'] in ('append', 'overwrite'): self.fn_write_data_into_hyper_file( in_logger, timer, in_dict) elif in_dict['action'] in ('delete', 'update'): self.fn_delete_data_from_hyper(in_logger, timer, in_dict) self.fn_get_records_count_from_table( in_logger, timer, in_dict) timer.start() hyper_connection.close() in_logger.info( self.locale.gettext( 'Connection to the Hyper engine file has been closed')) in_logger.info( self.locale.gettext('Hyper engine process has been shut down')) timer.stop() except HyperException as ex: in_logger.error(str(ex).replace(chr(10), ' ')) timer.stop() exit(1) return out_data_frame
def HyperCreate(): print(">>> Creating Hyper File <<<") path_to_database = Path(hyperfile) with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint,database=path_to_database,create_mode=CreateMode.CREATE_IF_NOT_EXISTS) as connection: connection.execute_command(command= f'''create table if not exists http ( serving_host text, client_host text, username text, ts text, timezone text, port text, request_body text, xforward_for text, status_code text, response_size text, content_length text, request_time_ms text, request_id text );''') connection.execute_command(command= f'''create table if not exists dump_table ( dump text );''')
def _create_schema(self): with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, self.hyper, CreateMode.CREATE_AND_REPLACE) as connection: for table in self.tables: connection.catalog.create_table(table)
def get_tables(schema: str, hyper_file_path: str): with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=hyper_file_path) as connection: catalog = connection.catalog # Query the Catalog API for all tables under the given schema return catalog.get_table_names(SchemaName(schema))
def get_row_count(schema: str, table: str, hyper_file_path: str): with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=hyper_file_path) as connection: # Query the Hyper File for the number of rows in the table return connection.execute_scalar_query( f"SELECT COUNT(*) FROM {TableName(schema, table)}")
class Hyper(System): def __init__(self, filename): self.db = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) self.conn = Connection(self.db.endpoint, filename, CreateMode.CREATE) def create(self, ddl): count = self.conn.execute_command(ddl) return [count] def load(self, filename): count = self.conn.execute_command("COPY logs FROM '" + filename + "' WITH (FORMAT csv, HEADER)") return [count] def query(self, sql): #schema = result.schema() return self.conn.execute_query(sql)
def test_export_date_values(self): """ Test the export of geo values (DSS geopoint storage type) :return: """ nan = float("nan") # ===> Define parameters input from DSS for exporter config = {} plugin_config = {} schema = { 'columns': [{ 'name': 'price', 'type': 'double' }, { 'name': 'last_review_parsed', 'type': 'date' }], 'userModified': True } rows = [(149.0, Timestamp('2018-10-19 13:20:50.349000')), (225.0, Timestamp('2019-05-21 00:00:00')), (150.0, NaT), (80.0, NaT)] # <=== # ===> Create a DSS-like exporter exporter = TableauHyperExporter(config, plugin_config) output_file_name = get_random_alphanumeric_string(10) + '.hyper' destination_file_path = os.path.join(self.output_path, output_file_name) exporter.open_to_file(schema, destination_file_path) for row in rows: exporter.write_row(row) exporter.close() with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU ) as hyper: with Connection(endpoint=hyper.endpoint, database=destination_file_path) as connection: with connection.execute_query( query=f"SELECT * FROM {TableName('Extract', 'Extract')}" ) as result: rows_from_hyper = list(result) assert len(rows) == len(rows_from_hyper) count = 0 valid = 0 for i in range(len(rows)): row_dss, row_hyper = rows[i], rows_from_hyper[i] for j in range(len(row_dss)): a, b = row_dss[j], row_hyper[j] if (pd.isna(a) and pd.isna(b)) or (a == b): valid += 1 count += 1 os.remove(destination_file_path)
def test_to_dss_date(self): schema_converter = SchemaConversion() path_to_hyper = "data/superstore_sample.hyper" hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) connection = Connection(hyper.endpoint, path_to_hyper) hyper_table = TableName('public', 'Orders') hyper_table_def = connection.catalog.get_table_definition(hyper_table) result = connection.execute_query(f'SELECT * FROM {hyper_table}') for row in result: pass sample_date = row[2].to_date() dss_date = datetime.datetime(sample_date.year, sample_date.month, sample_date.day) connection.close() hyper.close() dss_columns = schema_converter.hyper_columns_to_dss_columns( hyper_table_def.columns) return True
def hyperfile_to_df(hyper_path): print(hyper_path) with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint,database=hyper_path) as connection: print(connection.catalog.get_table_names(schema="Extract")) tablename = connection.catalog.get_table_names(schema="Extract")[0] table_definition = connection.catalog.get_table_definition(name=QualifiedName("Extract",tablename)) rows_in_table = connection.execute_result_list(query=f"SELECT * FROM {QualifiedName('Extract',tablename)}")#f"SELECT * FROM {escape_name(tablename)}") df = pd.DataFrame(columns=[c.name for c in table_definition.columns],data=rows_in_table) return df
def test_export_int_values(self): # ===> Define parameters input from DSS for exporter config = {} plugin_config = {} schema = { 'columns': [{ 'name': 'tiny_int', 'type': 'tinyint' }, { 'name': 'small_int', 'type': 'smallint' }, { 'name': 'big_int', 'type': 'bigint' }], 'userModified': True } rows = [(12, 370, -9223372036854775808), (2, 1000, 2147483647), (15, 15000, 21474836477777777), (126, 32766, 2147483648), (127, 32767, 2147483648)] # <=== # ===> Create a DSS-like exporter exporter = TableauHyperExporter(config, plugin_config) output_file_name = get_random_alphanumeric_string(10) + '.hyper' destination_file_path = os.path.join(self.output_path, output_file_name) exporter.open_to_file(schema, destination_file_path) for row in rows: exporter.write_row(row) exporter.close() with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU ) as hyper: with Connection(endpoint=hyper.endpoint, database=destination_file_path) as connection: with connection.execute_query( query=f"SELECT * FROM {TableName('Extract', 'Extract')}" ) as result: rows_from_hyper = list(result) assert len(rows) == len(rows_from_hyper) count = 0 valid = 0 for i in range(len(rows)): row_dss, row_hyper = rows[i], rows_from_hyper[i] for j in range(len(row_dss)): a, b = row_dss[j], row_hyper[j] if (pd.isna(a) and pd.isna(b)) or (a == b): valid += 1 count += 1 os.remove(destination_file_path)
def run_update_data_in_existing_hyper_file(): """ An example of how to update data in an existing Hyper file. """ print("EXAMPLE - Update existing data in an Hyper file") # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table. # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema. path_to_source_database = Path( __file__).parent / "data" / "superstore_sample.hyper" # Make a copy of the superstore sample Hyper file. path_to_database = Path( shutil.copy(path_to_source_database, "superstore_sample_update.hyper")).resolve() # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Connect to existing Hyper file "superstore_sample_update.hyper". with Connection(endpoint=hyper.endpoint, database=path_to_database) as connection: rows_pre_update = connection.execute_list_query( query= f"SELECT {escape_name('Loyalty Reward Points')}, {escape_name('Segment')}" f"FROM {escape_name('Customer')}") print( f"Pre-Update: Individual rows showing 'Loyalty Reward Points' and 'Segment' " f"columns: {rows_pre_update}\n") print( "Update 'Customers' table by adding 50 Loyalty Reward Points to all Corporate Customers." ) row_count = connection.execute_command( command=f"UPDATE {escape_name('Customer')} " f"SET {escape_name('Loyalty Reward Points')} = {escape_name('Loyalty Reward Points')} + 50 " f"WHERE {escape_name('Segment')} = {escape_string_literal('Corporate')}" ) print( f"The number of updated rows in table {escape_name('Customer')} is {row_count}" ) rows_post_update = connection.execute_list_query( query= f"SELECT {escape_name('Loyalty Reward Points')}, {escape_name('Segment')} " f"FROM {escape_name('Customer')}") print( f"Post-Update: Individual rows showing 'Loyalty Reward Points' and 'Segment' " f"columns: {rows_post_update}") print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def delete(self, where_condition): # Start Hyper with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Connect to an existing .hyper file (CreateMode.NONE) with Connection(endpoint=hyper.endpoint, database=self.path) as connection: delete_command = 'DELETE FROM "Extract"."Extract" WHERE {}'.format( where_condition) row_count = connection.execute_command(command=delete_command) logging.info(f'{delete_command}, row_count={row_count}')
def get_table_rows(table): """ Returns a list of list representing all the rows in the table """ with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint, database=settings.HYPER_FILE_PATH) as connection: rows_in_table = connection.execute_list_query( query=f"SELECT * FROM {table}") return rows_in_table
def run_delete_data_in_existing_hyper_file(): """ An example of how to delete data in an existing Hyper file. """ print("EXAMPLE - Delete data from an existing Hyper file") # Path to a Hyper file containing all data inserted into Customer, Product, Orders and LineItems table. # See "insert_data_into_multiple_tables.py" for an example that works with the complete schema. path_to_source_database = Path( __file__).parent / "data" / "superstore_sample.hyper" # Make a copy of the superstore example Hyper file. path_to_database = Path( shutil.copy(path_to_source_database, "superstore_sample_delete.hyper")).resolve() # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Connect to existing Hyper file "superstore_sample_delete.hyper". with Connection(endpoint=hyper.endpoint, database=path_to_database) as connection: print( f"Delete all rows from customer with the name 'Dennis Kane' from table {escape_name('Orders')}." ) # `execute_command` executes a SQL statement and returns the impacted row count. row_count = connection.execute_command( command=f"DELETE FROM {escape_name('Orders')} " f"WHERE {escape_name('Customer ID')} = ANY(" f"SELECT {escape_name('Customer ID')} FROM {escape_name('Customer')} " f"WHERE {escape_name('Customer Name')} = {escape_string_literal('Dennis Kane')})" ) print( f"The number of deleted rows in table {escape_name('Orders')} " f"is {row_count}.\n") print( f"Delete all rows from customer with the name 'Dennis Kane' from table {escape_name('Customer')}." ) row_count = connection.execute_command( command=f"DELETE FROM {escape_name('Customer')} " f"WHERE {escape_name('Customer Name')} = {escape_string_literal('Dennis Kane')}" ) print( f"The number of deleted rows in table Customer is {row_count}." ) print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")
def print_rows(self): """Prints the first 1,000 rows of a Hyper file""" with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hp: with Connection(endpoint=hp.endpoint, database=self.path) as connection: rows = connection.execute_list_query( f"SELECT * FROM {TableName('Extract', 'Extract')} LIMIT 1000" ) print("Showing first 1,000 rows") for row in rows: print(row)
def write_df_to_hyper(df,hyper_path): with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint,database=hyper_path) as connection: tablename = connection.catalog.get_table_names(schema="Extract")[0] # Delete the old data connection.execute_command(command=f"DELETE FROM {escape_name(tablename)}") with Inserter(connection, tablename) as inserter: inserter.add_rows(rows=df.values.tolist()) inserter.execute() return hyper_file_path
def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("customer.hyper") # Optional process parameters. # They are documented in the Tableau Hyper documentation, chapter "Process Settings" # (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/processsettings.html). process_parameters = { # Limits the number of Hyper event log files to two. "log_file_max_count": "2", # Limits the size of Hyper event log files to 100 megabytes. "log_file_size_limit": "100M" } # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper: # Optional connection parameters. # They are documented in the Tableau Hyper documentation, chapter "Connection Settings" # (https://help.tableau.com/current/api/hyper_api/en-us/reference/sql/connectionsettings.html). connection_parameters = {"lc_time": "en_US"} # Creates new Hyper file "customer.hyper". # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists. with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE, parameters=connection_parameters) as connection: connection.catalog.create_table(table_definition=customer_table) # Using path to current file, create a path that locates CSV file packaged with these examples. path_to_csv = str(Path(__file__).parent / "data" / "customers.csv") # Load all rows into "Customers" table from the CSV file. # `execute_command` executes a SQL statement and returns the impacted row count. count_in_customer_table = connection.execute_command( command= f"COPY {customer_table.table_name} from {escape_string_literal(path_to_csv)} with " f"(format csv, NULL 'NULL', delimiter ',', header)") print( f"The number of rows in table {customer_table.table_name} is {count_in_customer_table}." ) print("The connection to the Hyper file has been closed.") print("The Hyper process has been shut down.")