def test_get_table_def(self): data = [ (1001, 1, "Jane", "Doe", "2000-05-01", 29.0, False), (1002, 2, "John", "Doe", "1988-05-03", 33.0, False), (2201, 3, "Elonzo", "Smith", "1990-05-03", 21.0, True), (None, None, None, None, None, None, None) # Test Nulls ] df = get_spark_session()\ .createDataFrame(data, ["id", "dept_id", "first_name", "last_name", "dob", "age", "is_temp"])\ .createOrReplaceTempView("employees") df = get_spark_session().sql( "select id, cast(dept_id as short), first_name, " "last_name, dob, age, is_temp from employees") table_def = get_table_def(df, "Extract", "Extract") # Ensure that the Table Name matches assert (table_def.table_name.name == Name("Extract")) # Ensure that the the TableDefinition column names match assert (table_def.get_column(0).name == Name("id")) assert (table_def.get_column(1).name == Name("dept_id")) assert (table_def.get_column(2).name == Name("first_name")) assert (table_def.get_column(3).name == Name("last_name")) assert (table_def.get_column(4).name == Name("dob")) assert (table_def.get_column(5).name == Name("age")) assert (table_def.get_column(6).name == Name("is_temp")) # Ensure that the column data types were converted correctly assert (table_def.get_column(0).type == SqlType.big_int()) assert (table_def.get_column(1).type == SqlType.small_int()) assert (table_def.get_column(2).type == SqlType.text()) assert (table_def.get_column(3).type == SqlType.text()) assert (table_def.get_column(4).type == SqlType.text()) assert (table_def.get_column(5).type == SqlType.double()) assert (table_def.get_column(6).type == SqlType.bool())
def test_insert_data_into_hyper_file(self): data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"), (2201, "Elonzo", "Smith")] name = "output" table_def = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name=Name("id"), type=SqlType.big_int(), nullability=NULLABLE), TableDefinition.Column(name=Name("first_name"), type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name=Name("last_name"), type=SqlType.text(), nullability=NULLABLE) ]) path = insert_data_into_hyper_file(data, name, table_def) print(f'Database Path : {path}') tables = TestUtils.get_tables("Extract", "/tmp/hyperleaup/output/output.hyper") assert (len(tables) == 1) num_rows = TestUtils.get_row_count( "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper") assert (num_rows == 3)
def convert_struct_field(column: StructField) -> TableDefinition.Column: """Converts a Spark StructField to a Tableau Hyper SqlType""" if column.dataType == IntegerType(): sql_type = SqlType.int() elif column.dataType == LongType(): sql_type = SqlType.big_int() elif column.dataType == ShortType(): sql_type = SqlType.small_int() elif column.dataType == DoubleType(): sql_type = SqlType.double() elif column.dataType == FloatType(): sql_type = SqlType.double() elif column.dataType == BooleanType(): sql_type = SqlType.bool() elif column.dataType == DateType(): sql_type = SqlType.date() elif column.dataType == TimestampType(): sql_type = SqlType.timestamp() elif column.dataType == StringType(): sql_type = SqlType.text() else: # Trap the DecimalType case if str(column.dataType).startswith("DecimalType"): # Max precision is only up to 18 decimal places in Tableau Hyper API precision = column.dataType.precision if column.dataType.precision <= 18 else 18 scale = column.dataType.scale sql_type = SqlType.numeric(precision, scale) else: raise ValueError(f'Invalid StructField datatype for column `{column.name}` : {column.dataType}') nullable = NULLABLE if column.nullable else NOT_NULLABLE return TableDefinition.Column(name=column.name, type=sql_type, nullability=nullable)
def __init__(self): """ Handler for conversion of storage types between DSS and Tableau Hyper DSS storage types: "string","date","geopoint","geometry","array","map","object","double", "boolean","float","bigint","int","smallint","tinyint" Tableau Hyper storage types: TypeTag.BOOL, TypeTag.BIG_INT, TypeTag.SMALL_INT, TypeTag.INT, TypeTag.NUMERIC, TypeTag.DOUBLE, TypeTag.OID, TypeTag.BYTES, TypeTag.TEXT, TypeTag.VARCHAR, TypeTag.CHAR, TypeTag.JSON, TypeTag.DATE, TypeTag.INTERVAL, TypeTag.TIME, TypeTag.TIMESTAMP, TypeTag.TIMESTAMP_TZ, TypeTag.GEOGRAPHY """ handle_null = lambda f: lambda x: None if pd.isna(x) else f(x) # Mapping DSS to Tableau Hyper types self.mapping_dss_to_hyper = { 'array': (SqlType.text(), handle_null(str)), 'bigint': (SqlType.big_int(), handle_null(int)), 'boolean': (SqlType.bool(), handle_null(bool)), 'date': (SqlType.timestamp(), handle_null(to_hyper_timestamp)), 'double': (SqlType.double(), handle_null(float)), 'float': (SqlType.double(), handle_null(float)), 'geometry': (SqlType.text(), handle_null(str)), 'geopoint': (SqlType.geography(), handle_null(to_hyper_geography)), 'int': (SqlType.int(), handle_null(int)), 'map': (SqlType.text(), handle_null(str)), 'object': (SqlType.text(), handle_null(str)), 'smallint': (SqlType.small_int(), handle_null(int)), 'string': (SqlType.text(), handle_null(str)), 'tinyint': (SqlType.small_int(), handle_null(int)), } # Mapping Tableau Hyper to DSS types self.mapping_hyper_to_dss = { TypeTag.BIG_INT: ('bigint', handle_null(int)), TypeTag.BYTES: ('string', handle_null(str)), TypeTag.BOOL: ('boolean', handle_null(bool)), TypeTag.CHAR: ('string', handle_null(str)), TypeTag.DATE: ('date', handle_null(to_dss_date)), TypeTag.DOUBLE: ('double', handle_null(float)), TypeTag.GEOGRAPHY: ('geopoint', handle_null(to_dss_geopoint)), TypeTag.INT: ('int', handle_null(int)), TypeTag.INTERVAL: ('string', handle_null(str)), TypeTag.JSON: ('string', handle_null(str)), TypeTag.NUMERIC: ('double', handle_null(float)), TypeTag.OID: ('string', handle_null(str)), TypeTag.SMALL_INT: ('smallint', handle_null(int)), TypeTag.TEXT: ('string', handle_null(str)), TypeTag.TIME: ('string', handle_null(str)), TypeTag.TIMESTAMP: ('date', handle_null(to_dss_timestamp)), TypeTag.TIMESTAMP_TZ: ('string', handle_null(str)), TypeTag.VARCHAR: ('string', handle_null(str)) }
def createHyperTable(): columns = getConfig() page_table = TableDefinition(table_name="Page") for i in range(0, len(columns)): if columns[i]['DataType'] == "String": dt = SqlType.text() elif columns[i]['DataType'] == "Date": dt = SqlType.date() else: dt = SqlType.big_int() page_table.add_column(columns[i]['ColumnName'], dt) return page_table
def df_to_extract(df, output_path): ''' Converts a Pandas dataframe to a Tableau Extract. Parameters ---------- df (pandas dataframe): Dataframe to turn into a Tableau extract output_path (str): Where to create the Tableau extract ''' # Replace nan's with 0 df = df.replace(np.nan, 0.0, regex=True) print('Creating Tableau data extract...') with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection: # Create schema connection.catalog.create_schema('Extract') # Create list of column definitions, based on the datatypes in pandas dataframe dtype_map = { 'int32': SqlType.int(), 'int64': SqlType.big_int(), 'float32': SqlType.double(), 'float64': SqlType.double(), 'datetime64[ns]': SqlType.date(), 'object': SqlType.text() } table_def = [] # Get column headers to loop through them df_columns = list(df) for col_header in df_columns: dtype_str = str(df[col_header].dtype) # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str])) # Define table extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def) # Create table connection.catalog.create_table(extract_table) # Insert data with Inserter(connection, extract_table) as inserter: for idx, row in df.iterrows(): inserter.add_row(row) inserter.execute()
def fn_convert_to_hyper_types(given_type): switcher = { 'empty': SqlType.text(), 'bool': SqlType.bool(), 'int': SqlType.big_int(), 'float-dot': SqlType.double(), 'date-YMD': SqlType.date(), 'date-MDY': SqlType.date(), 'date-DMY': SqlType.date(), 'time-24': SqlType.time(), 'time-12': SqlType.time(), 'datetime-24-YMD': SqlType.timestamp(), 'datetime-12-MDY': SqlType.timestamp(), 'datetime-24-DMY': SqlType.timestamp(), 'str': SqlType.text() } identified_type = switcher.get(given_type) if identified_type is None: identified_type = SqlType.text() return identified_type
def _hyper_sql_type(self, source_column): """ Finds the correct Hyper column type for source_column source_column (obj): Source column (Instance of google.cloud.bigquery.schema.SchemaField) Returns a tableauhyperapi.SqlType Object """ source_column_type = source_column.field_type return_sql_type = { "BOOL": SqlType.bool(), "BYTES": SqlType.bytes(), "DATE": SqlType.date(), "DATETIME": SqlType.timestamp(), "INT64": SqlType.big_int(), "INTEGER": SqlType.int(), "NUMERIC": SqlType.numeric(18, 9), "FLOAT64": SqlType.double(), "STRING": SqlType.text(), "TIME": SqlType.time(), "TIMESTAMP": SqlType.timestamp_tz(), }.get(source_column_type) if return_sql_type is None: error_message = "No Hyper SqlType defined for BigQuery source type: {}".format( source_column_type ) logger.error(error_message) raise LookupError(error_message) logger.debug( "Translated source column type {} to Hyper SqlType {}".format( source_column_type, return_sql_type ) ) return return_sql_type
TableDefinition.Column(name="Address ID", type=SqlType.small_int(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Customer ID", type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Order Date", type=SqlType.date(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Order ID", type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Ship Date", type=SqlType.date(), nullability=NULLABLE), TableDefinition.Column(name="Ship Mode", type=SqlType.text(), nullability=NULLABLE) ] ) customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Customer", columns=[ TableDefinition.Column(name="Customer ID", type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Customer Name", type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Loyalty Reward Points", type=SqlType.big_int(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Segment", type=SqlType.text(), nullability=NOT_NULLABLE) ] ) products_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Products", columns=[ TableDefinition.Column(name="Category", type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Product ID", type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Product Name", type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name="Sub-Category", type=SqlType.text(), nullability=NOT_NULLABLE) ] )
telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Creates a new Hyper file or # replaces the file if it already exists. with Connection(endpoint=hyper.endpoint, database="drinks.hyper", create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # default schema name is "public" # define table drinks_table = TableDefinition( table_name="drinks", columns=[ TableDefinition.Column("country", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("beer_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("spirit_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("wine_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("total_litres_of_pure_alcohol", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("continent", SqlType.text(), NOT_NULLABLE) ]) # create tables connection.catalog.create_table(drinks_table) path_to_csv = "drinks.csv"
def Full_refresh(result): LogFileWrite("Running Full refresh") try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') DataTable = TableDefinition(TableName('Extract','Campaign_data'),[ ############Below Columns are extracted from Report data API TableDefinition.Column('Row_ID', SqlType.big_int()), TableDefinition.Column('Inserted Date', SqlType.date()), TableDefinition.Column('Date', SqlType.date()), TableDefinition.Column('Account Id', SqlType.varchar(50)), TableDefinition.Column('Account Name', SqlType.text()), TableDefinition.Column('Campaign Id', SqlType.varchar(50)), TableDefinition.Column('Campaign Name', SqlType.text()), TableDefinition.Column('Impressions', SqlType.big_int()), TableDefinition.Column('Clicks', SqlType.big_int()), TableDefinition.Column('Reach', SqlType.big_int()), TableDefinition.Column('Spend', SqlType.double()), TableDefinition.Column('Frequency', SqlType.double()), ]) print("The table is defined.") LogFileWrite("Successfully Facebook Campaign Table is defined") connection.catalog.create_table(DataTable) # print(Campaign_df["Id"].dtype) #print(range(len(Campaign_df["Id"]))) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 row_id=1 for i in range(0,len(result["Campaign Id"])): #print(str(result.loc[i,"CampaignId"])) #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") row_id=row_id+1 inserted_rows=inserted_rows+1 inserter.execute() print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Instered Rows are " +str(inserted_rows)) table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") print("Connection is open or closed" + str(connection.is_open)) print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
# -*- coding: utf-8 -*- """ Created on Thu Jul 2 21:35:47 2020 @author: Manish Chauhan || +91-9774083186 """ from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName # Starting local hyper instance extract_table = TableDefinition( table_name=TableName('extract', 'extract'), columns=[ TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Sale", SqlType.big_int()), TableDefinition.Column("Profit", SqlType.big_int()), #TableDefinition.Column("Date", SqlType.date()) ]) data_to_insert = [["1", "Vishakha", 99999, 80]] def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint,
path_to_database = Path(hyper_file_path) # The table is called "Extract" and will be created in the "Extract" schema # and contains four columns. extract_table = TableDefinition( table_name=table_name, columns=[ TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NULLABLE) ]) def insert_data(): """ Creates a simple .hyper file. For more on this, see the below example: https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py """ print("Creating single table for conversion.") # Starts the Hyper Process with telemetry enabled to send data to Tableau.
def sparkConnect(): # fetching DF from spark filestore if cf.file_type == 'csv': df = spark.read.format(cf.file_type) \ .option("inferSchema", cf.infer_schema) \ .option("header", cf.first_row_is_header) \ .option("sep", cf.delimiter) \ .load(cf.input_file_path) # print('\n', cf.input_file_path, '\n', cf.schema, '\n') # fetching table from db from databricks elif cf.file_type == 'jdbc': df = spark.read.format("jdbc") \ .option("driver", cf.driver) \ .option("url", cf.url) \ .option("dbtable", cf.table) \ .option("user", cf.user) \ .option("password", cf.password) \ .option("inferSchema", cf.infer_schema) \ .option("header", cf.first_row_is_header) \ .load() df.write.format("csv") \ .option("enoding", cf.charset) \ .option("header", cf.first_row_is_header) \ .option("sep", cf.delimiter) \ .save('/home/hari/HyperConverter/test') # pdf = df.select('*').toPandas() # path = '/home/hari/HyperConverter/test.csv' # pdf.to_csv(path, sep=',', index=False) path = glob.glob('/home/hari/HyperConverter/test/part*.csv') cf.input_file_path = path[0] cf.input_file_path = path print('\n', cf.input_file_path, '\n') col = list(df.dtypes) print(col) print(len(col)) for i in range(len(col)): col[i] = list(col[i]) col[i][1] = type_[col[i][1]] # print('\n', col, '\n') x = [] for i, j in col: print(i, j) if j == 'varchar': max_length = df.agg({i: "max"}).collect()[0] #print(max_length) xyz = max_length["max({})".format(i)] if xyz != None: max_length = len(xyz) if 19 <= max_length <= 40: max_length = 100 else: max_length = 30 else: max_length = 35 print(max_length) x.append( TableDefinition.Column(i, SqlType.varchar(max_length), NULLABLE)) elif j == 'int': x.append(TableDefinition.Column(i, SqlType.int(), NULLABLE)) elif j == 'date': x.append(TableDefinition.Column(i, SqlType.date(), NULLABLE)) elif j == 'numeric': x.append( TableDefinition.Column(i, SqlType.numeric(10, 4), NULLABLE)) elif j == 'bool': x.append(TableDefinition.Column(i, SqlType.bool(), NULLABLE)) elif j == 'big_int': x.append(TableDefinition.Column(i, SqlType.big_int(), NULLABLE)) elif j == 'double': x.append(TableDefinition.Column(i, SqlType.double(), NULLABLE)) elif j == 'text': print("this is culprate", i, j) x.append(TableDefinition.Column(i, SqlType.text(), NULLABLE)) print(x) print(len(x)) return x
NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException data_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="COVID-19", columns=[ TableDefinition.Column("Country/Region", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Province/State", SqlType.text(), NULLABLE), TableDefinition.Column("Latitude", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("Longitude", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("Case_Type", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Date", SqlType.date(), NOT_NULLABLE), TableDefinition.Column("Cases", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Difference", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Last_Update_Date", SqlType.date(), NOT_NULLABLE), ]) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("covid-19.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau.
from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException product_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Products", columns=[ TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("available_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("sold_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE) ]) def call_mlapi_to_dict(SearchCategory): print("Start call_mlapi_to_dict.") try: query = {'1': SearchCategory, 'limit': '50'} response = requests.get( 'https://api.mercadolibre.com/sites/MLA/search?q=' +
HyperProcess, Telemetry, Connection, SqlType, TableDefinition, CreateMode, TableName, Inserter, ) dtype_mapper = { "string": SqlType.text(), "str": SqlType.text(), "object": SqlType.text(), "O": SqlType.text(), "int64": SqlType.big_int(), "float64": SqlType.double(), "bool": SqlType.bool(), "datetime64[ns]": SqlType.timestamp(), "timedelta[ns]": SqlType.interval(), "category": SqlType.text(), } def read_hyper(path_to_hyper_file, custom_schema="Extract"): """Read a Tableau Hyper file and turn it into a Pandas DataFrame. Currently can only read single table extracts, which is Tableau's default way of creating an extract. Args:
from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="test02", columns=[ TableDefinition.Column("ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Mathematics", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Science", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("English", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Japanese", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("History", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Geography", SqlType.text(), NOT_NULLABLE), ] ) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("test02.hyper")
from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Customer", columns=[ TableDefinition.Column("Customer ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Customer Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Loyalty Reward Points", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Segment", SqlType.text(), NOT_NULLABLE) ]) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("customer.hyper") # Optional process parameters. # They are documented in the Tableau Hyper documentation, chapter "Process Settings"
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print("The HyperProcess has started.") object_name = "tdxdemo.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('activityId', SqlType.big_int()), TableDefinition.Column('activityType', SqlType.text()), TableDefinition.Column('contactId', SqlType.big_int()), TableDefinition.Column('industry', SqlType.text()), TableDefinition.Column('accountId', SqlType.text()), TableDefinition.Column('accountName', SqlType.text()), TableDefinition.Column('activityDate', SqlType.text()), TableDefinition.Column('company', SqlType.text()), TableDefinition.Column('name', SqlType.text()), TableDefinition.Column('activitiesTotal', SqlType.big_int()), TableDefinition.Column('activitiesPerContact', SqlType.big_int()), TableDefinition.Column('contactsTotal', SqlType.big_int()), TableDefinition.Column('totalFormsSubmitted', SqlType.big_int()), TableDefinition.Column('totalPageViews', SqlType.big_int()), TableDefinition.Column('totalWebVisits', SqlType.big_int()), ]) print("The table is defined.") connection.catalog.create_table(example_table) with Inserter(connection, example_table) as inserter: for i in request_data: inserter.add_row([ i['activityId'], i['activityType'], i['contactId'], i['industry'], i['accountId'], i['accountName'], i['activityDate'], i['company'], i['name'], i['activitiesTotal'], i['activitiesPerContact'], i['contactsTotal'], i['totalFormsSubmitted'], i['totalPageViews'], i['totalWebVisits'] ]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('tdxdemo.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj( reader, file_name, object_name, ExtraArgs={'ACL': 'public-read'}) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException #Create table definition to be used in Hyper file workbook_table = TableDefinition( table_name="Workbooks", columns=[ TableDefinition.Column("Project Id", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Content URL", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Created At", SqlType.date(), NOT_NULLABLE), TableDefinition.Column("Id", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Project Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("size", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Updated At", SqlType.date(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE) ]) #Name of hyper file that will be generated locally and deployed to Tableau path_to_database = Path("workbooks.hyper") #Name of project where Hyper file will be deployed #This project must exist on your server PROJECT = 'HyperTest' def main(): #Read in environment variables necessary to make connection with open('env.yaml') as f: env = yaml.load(f, Loader=yaml.FullLoader)
else: max_length = 30 else: max_length = 35 x.append(TableDefinition.Column(i, SqlType.varchar(max_length), NULLABLE)) elif j == 'int': x.append(TableDefinition.Column(i, SqlType.int(), NULLABLE)) elif j == 'date': x.append(TableDefinition.Column(i, SqlType.date(), NULLABLE)) elif j == 'numeric': x.append(TableDefinition.Column(i, SqlType.numeric(10, 4), NULLABLE)) elif j == 'bool': x.append(TableDefinition.Column(i, SqlType.bool(), NULLABLE)) elif j == 'big_int': x.append(TableDefinition.Column(i, SqlType.big_int(), NULLABLE)) elif j == 'double': x.append(TableDefinition.Column(i, SqlType.numeric(10, 4), NULLABLE)) else: x.append(TableDefinition.Column(i, SqlType.text(), NULLABLE)) print(i, j, max_length) print(x[len(x)-1], len(x)) print(len(x)) table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name=cf.table_name, columns=x ) def run_create_hyper_file_from_csv():
import os import json import sys from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_string_literal # SqlTypeをdictで参照 sql_type_dict = {} sql_type_dict['BIG_INT'] = SqlType.big_int() sql_type_dict['TEXT'] = SqlType.text() sql_type_dict['DOUBLE'] = SqlType.double() sql_type_dict['DATE'] = SqlType.date() sql_type_dict['TIMESTAMP'] = SqlType.timestamp() # NULLABLEをdictで参照 nullable_dict = {} nullable_dict['YES'] = NULLABLE nullable_dict['NO'] = NOT_NULLABLE def create_column_def(table_def_dict: dict): column_def = [] for key in table_def_dict.keys(): column_def.append( TableDefinition.Column( key, sql_type_dict[table_def_dict[key]['type']], nullable_dict[table_def_dict[key]['nullable']])) return column_def
from tableauhyperapi import Connection, HyperProcess, SqlType, TableDefinition, \ escape_string_literal, escape_name, NOT_NULLABLE, Telemetry, Inserter, CreateMode, TableName def run_create_hyper_file(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: print("The HyperProcess has started.") with Connection(hyper.endpoint, 'TrivialExample.hyper', CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract','Extract'), [ TableDefinition.Column('rowID', SqlType.big_int()), TableDefinition.Column('value', SqlType.big_int()), ]) print("The table is defined.") connection.catalog.create_table(example_table) with Inserter(connection, example_table) as inserter: for i in range (1, 101): inserter.add_row( [ i, i ] ) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") if __name__ == "__main__": try: run_create_hyper_file() except HyperException as ex: print(ex)