def create_schema(self, schema_dss, destination_file_path): """ Read the Tableau Hyper file an. :param schema_dss: DSS schema from the DSS dataset to export example: [{"columns": [{"name": "customer_id", "type": "bigint"}, ...]}, ...] :param destination_file_path: :return: """ # Read the destination file of the dss self.output_file = destination_file_path logger.info( "Writing the Tableau Hyper file to the following location: {}". format(destination_file_path)) logger.info( "The dataset to export has the following schema: {}".format( schema_dss)) dss_columns = schema_dss['columns'] dss_storage_types = [ column_descriptor['type'] for column_descriptor in dss_columns ] self.schema_converter.set_dss_storage_types(dss_storage_types) self.is_geo_table = dss_is_geo(schema_dss) logger.info("The input dataset contains a geo column: {}".format( self.is_geo_table)) if not self.schema_name or not self.table_name: logger.warning("Did not received the table or schema name.") raise ValueError("No valid schema or table name received.") logger.info("Received target schema {} and table {}".format( self.schema_name, self.table_name)) # Create the Tableau Hyper schema from the DSS schema self.output_table_definition = TableDefinition( TableName(self.schema_name, self.table_name), self.schema_converter.dss_columns_to_hyper_columns(dss_columns)) # Open connection to file self.hyper = HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) self.connection = Connection(self.hyper.endpoint, self.output_file, CreateMode.CREATE_AND_REPLACE) assert self.connection is not None self.connection.catalog.create_schema(self.schema_name) self.connection.catalog.create_table(self.output_table_definition) # Handle the geo case if self.is_geo_table: logger.info("Detected geo column. Creating a temporary table...") dss_tmp_schema = geo_to_text(schema_dss) dss_tmp_columns = dss_tmp_schema['columns'] self.tmp_table_definition = TableDefinition( TableName(self.schema_name, "tmp_" + self.table_name), self.schema_converter.dss_columns_to_hyper_columns( dss_tmp_columns)) self.connection.catalog.create_table(self.tmp_table_definition) logger.info("Created temporary table")
def importSchema(schema, extractFilename): try: extract = Extract(extractFilename) if 'name' not in schema: errMsg = 'No table name defined in the schema json. Key \'name\' required of type STRING' print('[ERROR]', errMsg, ':\n', e, '\nExiting now.') return respond(True, errMsg) exit(-1) if 'columns' not in schema: errMsg = 'No columns defined in the schema json. Key \'columns\' required of type [OBJECT]' print('[ERROR]', errMsg, ':\n', e, '\nExiting now.') return respond(True, errMsg) exit(-1) if (not extract.hasTable(schema['name'])): schema = TableDefinition() if ('collation' in schema and schema['collation'] in collationObj): schema.setDefaultCollation(collationObj[schema['collation']]) else: schema.setDefaultCollation(collationObj['BINARY']) schema = TableDefinition() for c in schema['columns']: if ('name' not in c): errMsg = 'No column name defined in the schema json. Key \'name\' required of type STRING' print('[ERROR]', errMsg, ':\n', e, '\nExiting now.') return respond(True, errMsg) exit(-1) colType = Type.UNICODE_STRING if ('type' in c and c['type'] in typeObj): colType = typeObj[c['type']] if ('collation' in c and c['collation'] in collationObj): schema.addColumnWithCollation(c['name'], colType, c['collation']) else: schema.addColumn(c['name'], colType) table = extract.addTable(schema['name'], schema) if (table == None): errMsg = 'A fatal error occurred while creating the table' print('[ERROR]', errMsg, ':\n', e, '\nExiting now.') return respond(True, errMsg) exit(-1) except TableauException as e: errMsg = 'A fatal error occurred while reading the schema definition' print('[ERROR]', errMsg, ':\n', e, '\nExiting now.') return respond(True, errMsg) exit(-1) return extract
def to_hyper(df, hyper_file_name, custom_schema="Extract", custom_table_name="Extract"): """ Write a Tableau Hyper file from a Pandas DataFrame. Currently can only write single table extracts, which is Tableau's default way of creating an extract. Args: df: Specify which DataFrame you want to output hyper_file_name: Specify the file name such as "Example.hyper" custom_schema: If you need to change the schema name. Defaults to "Extract" custom_table_name: If you need to change the schema name. Defaults to "Extract" Returns: Tableau Hyper file """ # Starts the Hyper Process with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters={"log_config": ""}, ) as hyper: # Creates a .hyper file to put the data into with Connection(hyper.endpoint, hyper_file_name, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema(custom_schema) # create a .hyper compatible column definition # from pd DataFrame column names and dtypes # using 3 list comprehensions to loop through # all the columns in the DataFrame column_names = [column for column in df.columns] column_dtype = [dtype for dtype in df.dtypes] hyper_table = TableDefinition( TableName(custom_schema, custom_table_name), [ TableDefinition.Column( column_names[column], dtype_mapper[str( column_dtype[column])]) for column in range(len(column_names)) ], ) connection.catalog.create_table(hyper_table) # Repace NaN with None, otherwise it will not be Null in Tableau df.replace({np.nan: None}, inplace=True) # Insert the data values into the hyper file data_to_insert = df.to_numpy() with Inserter(connection, hyper_table) as inserter: inserter.add_rows(tqdm((row for row in data_to_insert))) inserter.execute()
def createTabTable(tableName, columnHeading, dataTypes): extract_table = TableDefinition(table_name=TableName("Extract", tableName)) for head in columnHeading: extract_table.add_column(head, eval(mapper[dataTypes.pop(0)])) return extract_table
def test_insert_data_into_hyper_file(self): data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"), (2201, "Elonzo", "Smith")] name = "output" table_def = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name=Name("id"), type=SqlType.big_int(), nullability=NULLABLE), TableDefinition.Column(name=Name("first_name"), type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name=Name("last_name"), type=SqlType.text(), nullability=NULLABLE) ]) path = insert_data_into_hyper_file(data, name, table_def) print(f'Database Path : {path}') tables = TestUtils.get_tables("Extract", "/tmp/hyperleaup/output/output.hyper") assert (len(tables) == 1) num_rows = TestUtils.get_row_count( "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper") assert (num_rows == 3)
def __init__(self): self.location = "./output/meli_challenge_result.hyper" self.test_location = "../output/meli_challenge_result.hyper" self.searchResult_table = TableDefinition('results', [ TableDefinition.Column('id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('site_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('title', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('seller', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('price', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('prices', SqlType.json(), Nullability.NOT_NULLABLE), TableDefinition.Column('sale_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('currency_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('available_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('sold_quantity', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('buying_mode', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('listing_type_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('stop_time', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('condition', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('permalink', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('thumbnail', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('accepts_mercadopago', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('installments', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('shipping', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('seller_address', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('attributes', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('original_price', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('category_id', SqlType.text(), Nullability.NOT_NULLABLE), TableDefinition.Column('official_store_id', SqlType.int(), Nullability.NULLABLE), TableDefinition.Column('domain_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_product_id', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('tags', SqlType.text(), Nullability.NULLABLE), TableDefinition.Column('catalog_listing', SqlType.bool(), Nullability.NULLABLE), TableDefinition.Column('order_backend', SqlType.int(), Nullability.NULLABLE), ])
def importTextSchema( schemaJson ): try: if 'name' not in schemaJson: print('[ERROR] No table name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.') exit( -1 ) if 'columns' not in schemaJson: print('[ERROR] No columns defined in the schema json. Key \'columns\' required of type [OBJECT]:\nExiting now\n.') exit( -1 ) print('[INFO] Creating temp landing table.') table_def = TableDefinition('temptable'); for c in schemaJson['columns']: colFunc = typeObj['TEXT'] colType = colFunc() table_def.add_column(c['name'], colType) if ( table_def == None ): print('[ERROR] A fatal error occurred while creating the temp table:\nExiting now\n.') exit( -1 ) except HyperException as e: print('[ERROR] A fatal error occurred while reading the schema definition:\n', e, '\nExiting now.') exit( -1 ) return table_def
def get_table_def(df: DataFrame, schema_name: str, table_name: str) -> TableDefinition: """Returns a Tableau TableDefintion given a Spark DataFrame""" schema = df.schema cols = list(map(convert_struct_field, schema)) return TableDefinition(table_name=TableName("Extract", "Extract"), columns=cols)
def get_table_def(df: DataFrame, schema_name: str = 'Extract', table_name: str = 'Extract') -> TableDefinition: """Returns a Tableau TableDefintion given a Spark DataFrame""" schema = df.schema cols = list(map(HyperUtils.convert_struct_field, schema)) return TableDefinition(table_name=TableName(schema_name, table_name), columns=cols)
def importSchema( schemaJson ): try: if 'name' not in schemaJson: print('[ERROR] No table name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.') exit( -1 ) if 'columns' not in schemaJson: print('[ERROR] No columns defined in the schema json. Key \'columns\' required of type [OBJECT]:\nExiting now\n.') exit( -1 ) table_def = TableDefinition(schemaJson['name']); for c in schemaJson['columns']: if ( 'name' not in c ): print('No column name defined in the schema json. Key \'name\' required of type STRING:\nExiting now\n.') exit( -1 ) colFunc = typeObj['TEXT'] if( 'type' in c and c['type'] in typeObj): colFunc = typeObj[c['type']] if (c['type'] == 'CHAR'): if ( 'length' not in c ): print('No length defined for CHAR column', c['name'],'\nKey \'length\' required of type INTEGER:\nExiting now\n.') exit( -1 ) colType = colFunc(c['length']) elif (c['type'] == 'VARCHAR'): if ( 'length' not in c ): print('No length defined for VARCHAR column', c['name'],'\nKey \'length\' required of type INTEGER:\nExiting now\n.') exit( -1 ) colType = colFunc(c['length']) elif (c['type'] == 'NUMERIC'): if ( 'precision' not in c or 'scale' not in c ): print('No precision and/or scale defined for NUMERIC column', c['name'],'\nKeys \'precision\' and \'scale\' required of type INTEGER:\nExiting now\n.') exit( -1 ) colType = colFunc(c['precision'], c['scale']) else: colType = colFunc() else: colType = colFunc() if( 'collation' in c and c['collation'] in collationObj): colCollation = collationObj[c['collation']] table_def.add_column(c['name'], colType, colCollation) else: table_def.add_column(c['name'], colType) if ( table_def == None ): print('[ERROR] A fatal error occurred while creating the table:\nExiting now\n.') exit( -1 ) except HyperException as e: print('[ERROR] A fatal error occurred while reading the schema definition:\n', e, '\nExiting now.') exit( -1 ) return table_def
def fn_create_hyper_table(self, local_logger, timer, in_dict): timer.start() out_hyper_table = TableDefinition( TableName(in_dict['schema name'], in_dict['table name']), columns=in_dict['columns'], ) in_dict['connection'].catalog.create_table(table_definition=out_hyper_table) local_logger.info(self.locale.gettext( 'Hyper table "{hyper_table_name}" has been created') .replace('{hyper_table_name}', in_dict['table name'])) timer.stop() return out_hyper_table
def createHyperTable(): columns = getConfig() page_table = TableDefinition(table_name="Page") for i in range(0, len(columns)): if columns[i]['DataType'] == "String": dt = SqlType.text() elif columns[i]['DataType'] == "Date": dt = SqlType.date() else: dt = SqlType.big_int() page_table.add_column(columns[i]['ColumnName'], dt) return page_table
def df_to_extract(df, output_path): ''' Converts a Pandas dataframe to a Tableau Extract. Parameters ---------- df (pandas dataframe): Dataframe to turn into a Tableau extract output_path (str): Where to create the Tableau extract ''' # Replace nan's with 0 df = df.replace(np.nan, 0.0, regex=True) print('Creating Tableau data extract...') with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection: # Create schema connection.catalog.create_schema('Extract') # Create list of column definitions, based on the datatypes in pandas dataframe dtype_map = { 'int32': SqlType.int(), 'int64': SqlType.big_int(), 'float32': SqlType.double(), 'float64': SqlType.double(), 'datetime64[ns]': SqlType.date(), 'object': SqlType.text() } table_def = [] # Get column headers to loop through them df_columns = list(df) for col_header in df_columns: dtype_str = str(df[col_header].dtype) # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str])) # Define table extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def) # Create table connection.catalog.create_table(extract_table) # Insert data with Inserter(connection, extract_table) as inserter: for idx, row in df.iterrows(): inserter.add_row(row) inserter.execute()
def create(): with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: request_data = request.get_json() print(request_data) print("The HyperProcess has started.") object_name = "mealprep.hyper" file_name = os.environ.get('bucket_name') with Connection( endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('Breakfast', SqlType.text()), TableDefinition.Column('Lunch', SqlType.text()), TableDefinition.Column('Dinner', SqlType.text()), ]) print("The table is defined.") connection.catalog.create_table(example_table) print(example_table) print(type(example_table)) with Inserter(connection, example_table) as inserter: for i in request_data['data']: inserter.add_row([i['breakfast'], i['lunch'], i['dinner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.") with open('mealprep.hyper', 'rb') as reader: if object_name is None: object_name = file_name s3_client = boto3.client( 's3', aws_access_key_id=os.environ.get('aws_access_key_id'), aws_secret_access_key=os.environ.get('aws_secret_access_key')) try: response = s3_client.upload_fileobj(reader, file_name, object_name) except ClientError as e: logging.error(e) return False return redirect(url_for('index'))
def createHyperFile(): dict = parseData() file = "/Users/jharris/Desktop/workbookUsers.hyper" cols = dict['cols'] data = dict['data'] with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(hyper.endpoint, file, CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') table = TableDefinition( TableName('Extract', 'Extract'), [TableDefinition.Column(col, SqlType.text()) for col in cols]) connection.catalog.create_table(table) with Inserter(connection, table) as inserter: inserter.add_rows(rows=data) inserter.execute()
def _hyper_table_definition(self, source_table, hyper_table_name="Extract"): """ Build a hyper table definition from source_schema source_table (obj): Source table (Instance of google.cloud.bigquery.table.Table) hyper_table_name (string): Name of the target Hyper table, default="Extract" Returns a tableauhyperapi.TableDefinition Object """ logger.debug( "Building Hyper TableDefinition for table {}".format(source_table.reference) ) target_cols = [] for source_field in source_table.schema: this_name = source_field.name this_type = self._hyper_sql_type(source_field) this_col = TableDefinition.Column(name=this_name, type=this_type) # Check for Nullability this_mode = source_field.mode if this_mode == "REPEATED": raise ( HyperSQLTypeMappingError( "Field mode REPEATED is not implemented in Hyper" ) ) if this_mode == "REQUIRED": this_col = TableDefinition.Column( this_name, this_type, Nullability.NOT_NULLABLE ) target_cols.append(this_col) logger.debug("..Column {} - Type {}".format(this_name, this_type)) target_schema = TableDefinition( table_name=TableName("Extract", hyper_table_name), columns=target_cols ) return target_schema
def create_hyper(dist_path: str, table_def_dict: dict, file_name: str, src_path: str): # Hyperファイルを扱うためのセッションを作成 with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection( endpoint=hyper.endpoint, database=dist_path, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: column_def = create_column_def(table_def_dict) # テーブルを定義 table_def = TableDefinition(table_name=file_name, columns=column_def) # テーブル定義をもとに仮想的にテーブル作成(default schemaはpublic) connection.catalog.create_table(table_def) # PostgreSQLライクにCOPYコマンドを実行 record_count = connection.execute_command(command=f''' COPY {table_def.table_name} from {escape_string_literal(src_path)} with (format csv, delimiter ',', header) ''') print( f"The number of rows in table {table_def.table_name} is {record_count}." )
from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException product_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Products", columns=[ TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("available_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("sold_quantity", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE) ]) def call_mlapi_to_dict(SearchCategory): print("Start call_mlapi_to_dict.") try: query = {'1': SearchCategory, 'limit': '50'} response = requests.get(
def Full_refresh(result): LogFileWrite("Running Full refresh") try: with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess: print("The HyperProcess has started.") LogFileWrite("The HyperProcess has started.") print(hyperprocess.is_open) if hyperprocess.is_open==True: with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: if connection.is_open==True: print("The connection to the Hyper file is open.") LogFileWrite("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') DataTable = TableDefinition(TableName('Extract','Campaign_data'),[ ############Below Columns are extracted from Report data API TableDefinition.Column('Row_ID', SqlType.big_int()), TableDefinition.Column('Inserted Date', SqlType.date()), TableDefinition.Column('Date', SqlType.date()), TableDefinition.Column('Account Id', SqlType.varchar(50)), TableDefinition.Column('Account Name', SqlType.text()), TableDefinition.Column('Campaign Id', SqlType.varchar(50)), TableDefinition.Column('Campaign Name', SqlType.text()), TableDefinition.Column('Impressions', SqlType.big_int()), TableDefinition.Column('Clicks', SqlType.big_int()), TableDefinition.Column('Reach', SqlType.big_int()), TableDefinition.Column('Spend', SqlType.double()), TableDefinition.Column('Frequency', SqlType.double()), ]) print("The table is defined.") LogFileWrite("Successfully Facebook Campaign Table is defined") connection.catalog.create_table(DataTable) # print(Campaign_df["Id"].dtype) #print(range(len(Campaign_df["Id"]))) with Inserter(connection, TableName('Extract','Campaign_data')) as inserter: inserted_rows=1 row_id=1 for i in range(0,len(result["Campaign Id"])): #print(str(result.loc[i,"CampaignId"])) #print(result.loc[i,"Date"]) inserter.add_row([ int(row_id), datetime.today(), (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')), #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")), str(result.loc[i,"Account Id"]), str(result.loc[i,"Account Name"]), str(result.loc[i,"Campaign Id"]), str(result.loc[i,"Campaign Name"]), int(result.loc[i,"Impressions"]), int(result.loc[i,"Clicks"]), int(result.loc[i,"Reach"]), float(result.loc[i,"Spend"]), float(result.loc[i,"Frequency"]) ]) #print("instered") row_id=row_id+1 inserted_rows=inserted_rows+1 inserter.execute() print("Instered Rows are " +str(inserted_rows)) LogFileWrite("Instered Rows are " +str(inserted_rows)) table_name=TableName('Extract','Campaign_data') Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN(" Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} " Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",' Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",' Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",' Delet_query+='"'+'Frequency'+'")' #print(Delet_query) connection.execute_command(Delet_query) print("Deleted Duplicate rows") LogFileWrite("Successfully deleted Duplicate rows") else: print("unable to open connection to hyper file") LogFileWrite("unable to open connection to hyper file") if connection.is_open==True: connection.close() print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") else: print("Connection to Hyper file closed") LogFileWrite("Connection to Hyper file closed") print("Connection is open or closed" + str(connection.is_open)) else: print("Unable to start the Hyper process ") LogFileWrite("Unable to start the Hyper process ") if hyperprocess.is_open==True: hyperprocess.close() print("Forcefully shutted down the Hyper Process") LogFileWrite("Forcefully shutted down the Hyper Process") else: print("Hyper process is shutted down") LogFileWrite("Hyper process is shutted down") print("Connection is open or closed" + str(connection.is_open)) print("process is open or closed" + str(hyperprocess.is_open)) except HyperException as ex: LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...") LogFileWrite(str(ex)) connection.close() hyperprocess.close() SendEmailMessage() sys.exit()
# Creates a new Hyper file or # replaces the file if it already exists. with Connection(endpoint=hyper.endpoint, database="drinks.hyper", create_mode=CreateMode.CREATE_AND_REPLACE) as connection: # default schema name is "public" # define table drinks_table = TableDefinition( table_name="drinks", columns=[ TableDefinition.Column("country", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("beer_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("spirit_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("wine_servings", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("total_litres_of_pure_alcohol", SqlType.double(), NOT_NULLABLE), TableDefinition.Column("continent", SqlType.text(), NOT_NULLABLE) ]) # create tables connection.catalog.create_table(drinks_table) path_to_csv = "drinks.csv" print(drinks_table.table_name) count_in_drinks_table = connection.execute_command(
def run(self, args): """ Runs the command :param args: Arguments from argparse.Namespace """ input_file = Path(args.input_file) output_file = Path(args.output_file) if args.mode == AdjustVertexOrderMode.INVERT: print("Reversing vertex order of polygons in spatial columns") else: print( "Adjusting vertex order of polygons assuming data source with " "flat - earth topology in spatial columns") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint) as connection: catalog = connection.catalog # Create output database file, fails if the file already exists catalog.create_database(output_file) # Attach input and output database files catalog.attach_database(input_file, "input") catalog.attach_database(output_file, "output") # Iterate over all schemas in the input file for schema_name in catalog.get_schema_names("input"): # Create the schema in the output file catalog.create_schema_if_not_exists( SchemaName("output", schema_name.name)) # Iterate over all tables in the input schema for in_table in catalog.get_table_names(schema_name): table_definition = catalog.get_table_definition( name=in_table) columns = table_definition.columns out_table = TableName("output", schema_name.name, in_table.name) out_table_definition = TableDefinition( out_table, columns) # Create the table in the output file with the same table definition. # Note that any constraints on the table in the input file # will not be present in the table in the output file catalog.create_table(out_table_definition) spatial_columns = [ c.name for c in columns if c.type == SqlType.geography() ] in_column_modifiers = [ f"geo_{args.mode.value}_vertex_order({c.name})" if c.type == SqlType.geography() else f"{c.name}" for c in columns ] if len(spatial_columns) > 0: print( f"Copying table {in_table} with {len(spatial_columns)} " f"spatial columns: {spatial_columns}...") else: print( f"Copying table {in_table} with no spatial columns..." ) row_count = connection.execute_command( f"INSERT INTO {out_table} SELECT {','.join(in_column_modifiers)} FROM {in_table}" ) print(f" {row_count} rows copied")
from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ TableName, Name, \ HyperException # The table is called "Extract" and will be created in the "Extract" schema. # This has historically been the default table name and schema for extracts created by Tableau extract_table = TableDefinition( table_name=TableName("Extract", "Extract"), columns=[ TableDefinition.Column(name='Name', type=SqlType.text(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Location', type=SqlType.geography(), nullability=NOT_NULLABLE) ]) def run_insert_spatial_data_to_a_hyper_file(): """ An example of how to add spatial data to a Hyper file. """ print("EXAMPLE - Add spatial data to a Hyper file ") path_to_database = Path("spatial_data.hyper") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
x.append(workbook.name) y.append(workbook.owner_id) for users in y: z.append(server.users.get_by_id(users)) with open('workbook_info.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerows(zip(x, y, z)) # HYPER API TO CREATE HYPER FILE Workbook_Info_table = TableDefinition( table_name="Workbook_Info", columns=[ TableDefinition.Column("Workbook Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Workbook LUID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Owner Info", SqlType.text(), NOT_NULLABLE) ] ) def run_create_hyper_file_from_csv(): path_to_database = Path("Workbook_Info.hyper") process_parameters = { "log_file_max_count": "2", "log_file_size_limit": "100M" } with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=process_parameters) as hyper:
# Step 1: Start Hyper instance with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: #step 2: Start hyper process with Connection(endpoint=hyper.endpoint, create_mode=CreateMode.CREATE_AND_REPLACE, database=PATH_TO_HYPER) as connection: print( connection.execute_scalar_query( query="SELECT'securing connections with endpoint!'")) #create table def with Table name empdetails = TableDefinition( table_name=TableName('Extract', 'empdetails'), columns=[ TableDefinition.Column('EMP_ID', SqlType.int()), TableDefinition.Column('EMP_NAME', SqlType.text()), TableDefinition.Column('MONTHY_SALARY', SqlType.int()), TableDefinition.Column('UNI_FLAG', SqlType.int()) ]) #Create the table in the connection catalog connection.catalog.create_schema('Extract') connection.catalog.create_table(empdetails) print('Validating the data...') cmd = f"COPY{empdetails.table_name} from {escape_string_literal(PATH_TO_CSV)}\ with " "(format csv , NULL 'NULL', delimiter ',' , HEADER)" table_count = connection.execute_command(cmd) total_time = int((time.time() - start)) total_time = "{:.2f}".format(total_time) print(f'Loaded all the {table_count} data in {total_time} seconds')
hyper_file_path = "hyper_for_csv.hyper" table_name = TableName("Extract", "Extract") output_name = "output.csv" path_to_database = Path(hyper_file_path) # The table is called "Extract" and will be created in the "Extract" schema # and contains four columns. extract_table = TableDefinition( table_name=table_name, columns=[ TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NULLABLE), TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE), TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NULLABLE) ]) def insert_data(): """ Creates a simple .hyper file. For more on this, see the below example: https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py """ print("Creating single table for conversion.")
# -*- coding: utf-8 -*- """ Created on Thu Jul 2 21:35:47 2020 @author: Manish Chauhan || +91-9774083186 """ from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName # Starting local hyper instance extract_table = TableDefinition( table_name=TableName('extract', 'extract'), columns=[ TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Sale", SqlType.big_int()), TableDefinition.Column("Profit", SqlType.big_int()), #TableDefinition.Column("Date", SqlType.date()) ]) data_to_insert = [["1", "Vishakha", 99999, 80]] def run_create_hyper_file_from_csv(): print("Inside Fucntion to pick data from CSV into table in new Hyper file") with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists with Connection( endpoint=hyper.endpoint,
from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="test02", columns=[ TableDefinition.Column("ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Mathematics", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Science", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("English", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Japanese", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("History", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Geography", SqlType.text(), NOT_NULLABLE), ] ) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("test02.hyper")
# # ----------------------------------------------------------------------------- from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, \ NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, \ escape_name, escape_string_literal, \ HyperException customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name="Customer", columns=[ TableDefinition.Column("Customer ID", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Customer Name", SqlType.text(), NOT_NULLABLE), TableDefinition.Column("Loyalty Reward Points", SqlType.big_int(), NOT_NULLABLE), TableDefinition.Column("Segment", SqlType.text(), NOT_NULLABLE) ]) def run_create_hyper_file_from_csv(): """ An example demonstrating loading data from a csv into a new Hyper file """ print("EXAMPLE - Load data from CSV into table in new Hyper file") path_to_database = Path("customer.hyper") # Optional process parameters.
query_res['column_ex4'] = query_res['column_ex4'].astype(str) query_res['column_ex5'] = query_res['column_ex5'].astype(str) query_res['column_ex6'] = query_res['column_ex6'].astype(str) with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection(endpoint=hyper.endpoint, database=path_to_database, create_mode=CreateMode.CREATE_AND_REPLACE) as connection: connection.catalog.create_schema('Extract') # Define the hyper table (Same definition as the full refresh) example_table = TableDefinition(TableName('Extract', 'Extract'), [ TableDefinition.Column('column_ex', SqlType.varchar(500)), TableDefinition.Column('column_ex2', SqlType.varchar(500)), TableDefinition.Column('column_ex_int', SqlType.int()), TableDefinition.Column('column_ex3', SqlType.varchar(500)), TableDefinition.Column('column_ex_int2', SqlType.int()), TableDefinition.Column('column_ex4', SqlType.varchar(500)), TableDefinition.Column('column_ex5', SqlType.varchar(500)), TableDefinition.Column('column_ex_int3', SqlType.int()), TableDefinition.Column('column_ex6', SqlType.varchar(500)), ]) print("The table is defined.") connection.catalog.create_table(table_definition=example_table) # Insert data from dataframe to hyper table with Inserter(connection, example_table) as inserter: for i in range(len(query_res)): inserter.add_row( [ query_res['column_ex'][i], \ query_res['column_ex2'][i], \ int(query_res['column_ex_int'][i]), \ query_res['column_ex3'][i], \
from pathlib import Path from tableauhyperapi import HyperProcess, Telemetry, \ Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \ Inserter, escape_name, escape_string_literal, HyperException import config as cf from spark_connect import sparkConnect # test = sparkConnect() # schema = test[1] # print(schema) customer_table = TableDefinition( # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace. table_name=cf.table_name, columns=sparkConnect()) print('\n', cf.input_file_path, '\n') def run_create_hyper_file_from_csv(): """ Loading data from a csv into a new Hyper file """ print("Load data from CSV into table in new Hyper file") # Starts the Hyper Process with telemetry enabled to send data to Tableau. # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU. with HyperProcess( telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with Connection( endpoint=hyper.endpoint,