Пример #1
0
    def test_get_table_def(self):
        data = [
            (1001, 1, "Jane", "Doe", "2000-05-01", 29.0, False),
            (1002, 2, "John", "Doe", "1988-05-03", 33.0, False),
            (2201, 3, "Elonzo", "Smith", "1990-05-03", 21.0, True),
            (None, None, None, None, None, None, None)  # Test Nulls
        ]
        df = get_spark_session()\
            .createDataFrame(data, ["id", "dept_id", "first_name", "last_name", "dob", "age", "is_temp"])\
            .createOrReplaceTempView("employees")
        df = get_spark_session().sql(
            "select id, cast(dept_id as short), first_name, "
            "last_name, dob, age, is_temp from employees")
        table_def = get_table_def(df, "Extract", "Extract")

        # Ensure that the Table Name matches
        assert (table_def.table_name.name == Name("Extract"))

        # Ensure that the the TableDefinition column names match
        assert (table_def.get_column(0).name == Name("id"))
        assert (table_def.get_column(1).name == Name("dept_id"))
        assert (table_def.get_column(2).name == Name("first_name"))
        assert (table_def.get_column(3).name == Name("last_name"))
        assert (table_def.get_column(4).name == Name("dob"))
        assert (table_def.get_column(5).name == Name("age"))
        assert (table_def.get_column(6).name == Name("is_temp"))

        # Ensure that the column data types were converted correctly
        assert (table_def.get_column(0).type == SqlType.big_int())
        assert (table_def.get_column(1).type == SqlType.small_int())
        assert (table_def.get_column(2).type == SqlType.text())
        assert (table_def.get_column(3).type == SqlType.text())
        assert (table_def.get_column(4).type == SqlType.text())
        assert (table_def.get_column(5).type == SqlType.double())
        assert (table_def.get_column(6).type == SqlType.bool())
Пример #2
0
 def test_insert_data_into_hyper_file(self):
     data = [(1001, "Jane", "Doe"), (1002, "John", "Doe"),
             (2201, "Elonzo", "Smith")]
     name = "output"
     table_def = TableDefinition(
         table_name=TableName("Extract", "Extract"),
         columns=[
             TableDefinition.Column(name=Name("id"),
                                    type=SqlType.big_int(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("first_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE),
             TableDefinition.Column(name=Name("last_name"),
                                    type=SqlType.text(),
                                    nullability=NULLABLE)
         ])
     path = insert_data_into_hyper_file(data, name, table_def)
     print(f'Database Path : {path}')
     tables = TestUtils.get_tables("Extract",
                                   "/tmp/hyperleaup/output/output.hyper")
     assert (len(tables) == 1)
     num_rows = TestUtils.get_row_count(
         "Extract", "Extract", "/tmp/hyperleaup/output/output.hyper")
     assert (num_rows == 3)
Пример #3
0
def convert_struct_field(column: StructField) -> TableDefinition.Column:
    """Converts a Spark StructField to a Tableau Hyper SqlType"""
    if column.dataType == IntegerType():
        sql_type = SqlType.int()
    elif column.dataType == LongType():
        sql_type = SqlType.big_int()
    elif column.dataType == ShortType():
        sql_type = SqlType.small_int()
    elif column.dataType == DoubleType():
        sql_type = SqlType.double()
    elif column.dataType == FloatType():
        sql_type = SqlType.double()
    elif column.dataType == BooleanType():
        sql_type = SqlType.bool()
    elif column.dataType == DateType():
        sql_type = SqlType.date()
    elif column.dataType == TimestampType():
        sql_type = SqlType.timestamp()
    elif column.dataType == StringType():
        sql_type = SqlType.text()
    else:
        # Trap the DecimalType case
        if str(column.dataType).startswith("DecimalType"):
            # Max precision is only up to 18 decimal places in Tableau Hyper API
            precision = column.dataType.precision if column.dataType.precision <= 18 else 18
            scale = column.dataType.scale
            sql_type = SqlType.numeric(precision, scale)
        else:
            raise ValueError(f'Invalid StructField datatype for column `{column.name}` : {column.dataType}')
    nullable = NULLABLE if column.nullable else NOT_NULLABLE
    return TableDefinition.Column(name=column.name, type=sql_type, nullability=nullable)
Пример #4
0
    def __init__(self):
        """
        Handler for conversion of storage types between DSS and Tableau Hyper

        DSS storage types:

        "string","date","geopoint","geometry","array","map","object","double",
        "boolean","float","bigint","int","smallint","tinyint"

        Tableau Hyper storage types:

        TypeTag.BOOL, TypeTag.BIG_INT, TypeTag.SMALL_INT, TypeTag.INT, TypeTag.NUMERIC,
        TypeTag.DOUBLE, TypeTag.OID, TypeTag.BYTES, TypeTag.TEXT, TypeTag.VARCHAR, TypeTag.CHAR,
        TypeTag.JSON, TypeTag.DATE, TypeTag.INTERVAL, TypeTag.TIME, TypeTag.TIMESTAMP,
        TypeTag.TIMESTAMP_TZ, TypeTag.GEOGRAPHY

        """
        handle_null = lambda f: lambda x: None if pd.isna(x) else f(x)

        # Mapping DSS to Tableau Hyper types
        self.mapping_dss_to_hyper = {
            'array': (SqlType.text(), handle_null(str)),
            'bigint': (SqlType.big_int(), handle_null(int)),
            'boolean': (SqlType.bool(), handle_null(bool)),
            'date': (SqlType.timestamp(), handle_null(to_hyper_timestamp)),
            'double': (SqlType.double(), handle_null(float)),
            'float': (SqlType.double(), handle_null(float)),
            'geometry': (SqlType.text(), handle_null(str)),
            'geopoint': (SqlType.geography(), handle_null(to_hyper_geography)),
            'int': (SqlType.int(), handle_null(int)),
            'map': (SqlType.text(), handle_null(str)),
            'object': (SqlType.text(), handle_null(str)),
            'smallint': (SqlType.small_int(), handle_null(int)),
            'string': (SqlType.text(), handle_null(str)),
            'tinyint': (SqlType.small_int(), handle_null(int)),
        }

        # Mapping Tableau Hyper to DSS types
        self.mapping_hyper_to_dss = {
            TypeTag.BIG_INT: ('bigint', handle_null(int)),
            TypeTag.BYTES: ('string', handle_null(str)),
            TypeTag.BOOL: ('boolean', handle_null(bool)),
            TypeTag.CHAR: ('string', handle_null(str)),
            TypeTag.DATE: ('date', handle_null(to_dss_date)),
            TypeTag.DOUBLE: ('double', handle_null(float)),
            TypeTag.GEOGRAPHY: ('geopoint', handle_null(to_dss_geopoint)),
            TypeTag.INT: ('int', handle_null(int)),
            TypeTag.INTERVAL: ('string', handle_null(str)),
            TypeTag.JSON: ('string', handle_null(str)),
            TypeTag.NUMERIC: ('double', handle_null(float)),
            TypeTag.OID: ('string', handle_null(str)),
            TypeTag.SMALL_INT: ('smallint', handle_null(int)),
            TypeTag.TEXT: ('string', handle_null(str)),
            TypeTag.TIME: ('string', handle_null(str)),
            TypeTag.TIMESTAMP: ('date', handle_null(to_dss_timestamp)),
            TypeTag.TIMESTAMP_TZ: ('string', handle_null(str)),
            TypeTag.VARCHAR: ('string', handle_null(str))
        }
Пример #5
0
def createHyperTable():
    columns = getConfig()
    page_table = TableDefinition(table_name="Page")
    for i in range(0, len(columns)):
        if columns[i]['DataType'] == "String":
            dt = SqlType.text()
        elif columns[i]['DataType'] == "Date":
            dt = SqlType.date()
        else:
            dt = SqlType.big_int()
        page_table.add_column(columns[i]['ColumnName'], dt)
    return page_table
def df_to_extract(df, output_path):
    '''
    Converts a Pandas dataframe to a Tableau Extract.

    Parameters
    ----------
    df (pandas dataframe): Dataframe to turn into a Tableau extract
    output_path (str): Where to create the Tableau extract
    ''' 

    # Replace nan's with 0
    df = df.replace(np.nan, 0.0, regex=True)

    print('Creating Tableau data extract...')
    with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with Connection(hyper.endpoint, output_path, CreateMode.CREATE_AND_REPLACE) as connection:
            
            # Create schema
            connection.catalog.create_schema('Extract')

            # Create list of column definitions, based on the datatypes in pandas dataframe
            dtype_map = {
                'int32': SqlType.int(),
                'int64': SqlType.big_int(),
                'float32': SqlType.double(),
                'float64': SqlType.double(),
                'datetime64[ns]': SqlType.date(),
                'object': SqlType.text() 
            }
            table_def = []

            # Get column headers to loop through them
            df_columns = list(df)

            for col_header in df_columns:
                dtype_str = str(df[col_header].dtype)

                # Use dtype_str to lookup appropiate SqlType from dtype_map and append new column to table definition
                table_def.append(TableDefinition.Column(col_header, dtype_map[dtype_str]))
                
            # Define table
            extract_table = TableDefinition(TableName('Extract', 'Extract'), table_def)

            # Create table
            connection.catalog.create_table(extract_table)

            # Insert data
            with Inserter(connection, extract_table) as inserter:
                for idx, row in df.iterrows():
                    inserter.add_row(row)
                
                inserter.execute() 
 def fn_convert_to_hyper_types(given_type):
     switcher = {
         'empty': SqlType.text(),
         'bool': SqlType.bool(),
         'int': SqlType.big_int(),
         'float-dot': SqlType.double(),
         'date-YMD': SqlType.date(),
         'date-MDY': SqlType.date(),
         'date-DMY': SqlType.date(),
         'time-24': SqlType.time(),
         'time-12': SqlType.time(),
         'datetime-24-YMD': SqlType.timestamp(),
         'datetime-12-MDY': SqlType.timestamp(),
         'datetime-24-DMY': SqlType.timestamp(),
         'str': SqlType.text()
     }
     identified_type = switcher.get(given_type)
     if identified_type is None:
         identified_type = SqlType.text()
     return identified_type
Пример #8
0
    def _hyper_sql_type(self, source_column):
        """
        Finds the correct Hyper column type for source_column

        source_column (obj): Source column (Instance of google.cloud.bigquery.schema.SchemaField)

        Returns a tableauhyperapi.SqlType Object
        """

        source_column_type = source_column.field_type
        return_sql_type = {
            "BOOL": SqlType.bool(),
            "BYTES": SqlType.bytes(),
            "DATE": SqlType.date(),
            "DATETIME": SqlType.timestamp(),
            "INT64": SqlType.big_int(),
            "INTEGER": SqlType.int(),
            "NUMERIC": SqlType.numeric(18, 9),
            "FLOAT64": SqlType.double(),
            "STRING": SqlType.text(),
            "TIME": SqlType.time(),
            "TIMESTAMP": SqlType.timestamp_tz(),
        }.get(source_column_type)

        if return_sql_type is None:
            error_message = "No Hyper SqlType defined for BigQuery source type: {}".format(
                source_column_type
            )
            logger.error(error_message)
            raise LookupError(error_message)

        logger.debug(
            "Translated source column type {} to Hyper SqlType {}".format(
                source_column_type, return_sql_type
            )
        )
        return return_sql_type
Пример #9
0
        TableDefinition.Column(name="Address ID", type=SqlType.small_int(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Customer ID", type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Order Date", type=SqlType.date(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Order ID", type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Ship Date", type=SqlType.date(), nullability=NULLABLE),
        TableDefinition.Column(name="Ship Mode", type=SqlType.text(), nullability=NULLABLE)
    ]
)

customer_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="Customer",
    columns=[
        TableDefinition.Column(name="Customer ID", type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Customer Name", type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Loyalty Reward Points", type=SqlType.big_int(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Segment", type=SqlType.text(), nullability=NOT_NULLABLE)
    ]
)

products_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="Products",
    columns=[
        TableDefinition.Column(name="Category", type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Product ID", type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Product Name", type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name="Sub-Category", type=SqlType.text(), nullability=NOT_NULLABLE)
    ]
)
Пример #10
0
        telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:

    # Creates a new Hyper file or
    # replaces the file if it already exists.
    with Connection(endpoint=hyper.endpoint,
                    database="drinks.hyper",
                    create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

        # default schema name is "public"
        # define table
        drinks_table = TableDefinition(
            table_name="drinks",
            columns=[
                TableDefinition.Column("country", SqlType.text(),
                                       NOT_NULLABLE),
                TableDefinition.Column("beer_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("spirit_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("wine_servings", SqlType.big_int(),
                                       NOT_NULLABLE),
                TableDefinition.Column("total_litres_of_pure_alcohol",
                                       SqlType.double(), NOT_NULLABLE),
                TableDefinition.Column("continent", SqlType.text(),
                                       NOT_NULLABLE)
            ])

        # create tables
        connection.catalog.create_table(drinks_table)

        path_to_csv = "drinks.csv"
def Full_refresh(result):
    LogFileWrite("Running Full refresh")
    try:
        with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyperprocess:
            print("The HyperProcess has started.")
            LogFileWrite("The HyperProcess has started.")
            print(hyperprocess.is_open)
            if hyperprocess.is_open==True:
                with Connection(hyperprocess.endpoint, 'Facebook_campaigns.hyper', CreateMode.CREATE_AND_REPLACE,) as connection: 
                    if connection.is_open==True:
                        print("The connection to the Hyper file is open.")
                        LogFileWrite("The connection to the Hyper file is open.")
                        connection.catalog.create_schema('Extract')
                        DataTable = TableDefinition(TableName('Extract','Campaign_data'),[
                        ############Below Columns are extracted from Report data API
                        TableDefinition.Column('Row_ID', SqlType.big_int()),
                        TableDefinition.Column('Inserted Date', SqlType.date()),
                        TableDefinition.Column('Date', SqlType.date()),
                        TableDefinition.Column('Account Id', SqlType.varchar(50)),
                        TableDefinition.Column('Account Name', SqlType.text()),
                        TableDefinition.Column('Campaign Id', SqlType.varchar(50)),
                        TableDefinition.Column('Campaign Name', SqlType.text()),
                        TableDefinition.Column('Impressions', SqlType.big_int()),
                        TableDefinition.Column('Clicks', SqlType.big_int()),
                        TableDefinition.Column('Reach', SqlType.big_int()),
                        TableDefinition.Column('Spend', SqlType.double()),
                        TableDefinition.Column('Frequency', SqlType.double()),
                        ])
                        print("The table is defined.")
                        LogFileWrite("Successfully Facebook Campaign Table is defined")
                        connection.catalog.create_table(DataTable)
                       # print(Campaign_df["Id"].dtype)
                        #print(range(len(Campaign_df["Id"])))
                        
                        with Inserter(connection, TableName('Extract','Campaign_data')) as inserter:
                            inserted_rows=1
                            row_id=1
                            for i in range(0,len(result["Campaign Id"])):
                                #print(str(result.loc[i,"CampaignId"]))
                                #print(result.loc[i,"Date"])
                                inserter.add_row([
                                int(row_id),
                                datetime.today(),
                                (datetime.strptime(result.loc[i,"Date"], '%Y-%m-%d')),
                                #(datetime.date(result.loc[i,"Date"])),#, "%Y-%m-%d")),
                                str(result.loc[i,"Account Id"]),
                                str(result.loc[i,"Account Name"]),
                                str(result.loc[i,"Campaign Id"]),
                                str(result.loc[i,"Campaign Name"]),
                                int(result.loc[i,"Impressions"]),
                                int(result.loc[i,"Clicks"]),
                                int(result.loc[i,"Reach"]),
                                float(result.loc[i,"Spend"]),
                                float(result.loc[i,"Frequency"])
                                ])
                                #print("instered")
                                row_id=row_id+1
                                inserted_rows=inserted_rows+1
                            inserter.execute()
                            print("Instered Rows are " +str(inserted_rows))
                            LogFileWrite("Instered Rows are " +str(inserted_rows))
                        table_name=TableName('Extract','Campaign_data')
                        Delet_query=f"DELETE FROM {table_name} WHERE " +'"'+ 'Row_ID'+'"'+" NOT IN("
                        Delet_query+="SELECT MAX("+'"'+'Row_ID'+'"'+f") FROM {table_name} "
                        Delet_query+="GROUP BY " +'"'+'Date'+'",'+'"'+'Campaign Id'+'",'+'"'+'Campaign Name'+'",'
                        Delet_query+='"'+'Account Id'+'",'+'"'+'Impressions'+'",'
                        Delet_query+='"'+'Clicks'+'",'+'"'+'Account Name'+'",'+'"'+'Reach'+'",'+'"'+'Spend'+'",'
                        Delet_query+='"'+'Frequency'+'")'
                        #print(Delet_query)
                        
                        connection.execute_command(Delet_query)
                        print("Deleted Duplicate rows")
                        LogFileWrite("Successfully deleted Duplicate rows")
                    else:
                        print("unable to open connection to hyper file")
                        LogFileWrite("unable to open connection to hyper file")
                if connection.is_open==True:
                    connection.close()
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                else:
                    print("Connection to Hyper file closed")
                    LogFileWrite("Connection to Hyper file closed")
                    print("Connection is open or closed" + str(connection.is_open))
            else:
                print("Unable to start the Hyper process ")
                LogFileWrite("Unable to start the Hyper process ")
        if hyperprocess.is_open==True:
            hyperprocess.close()
            print("Forcefully shutted down the Hyper Process")
            LogFileWrite("Forcefully shutted down the Hyper Process")
        else:
            print("Hyper process is shutted down")
            LogFileWrite("Hyper process is shutted down")
            print("Connection is open or closed" + str(connection.is_open))
            print("process is open or closed" + str(hyperprocess.is_open))
    except HyperException as ex:
        LogFileWrite("There is exception in starting Tableau Hyper Process. Exiting...")
        LogFileWrite(str(ex))
        connection.close()
        hyperprocess.close()
        SendEmailMessage()
        sys.exit()
Пример #12
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jul  2 21:35:47 2020

@author: Manish Chauhan || +91-9774083186
"""
from tableauhyperapi import HyperProcess, Inserter, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_name, escape_string_literal, HyperException, TableName

# Starting local hyper instance

extract_table = TableDefinition(
    table_name=TableName('extract', 'extract'),
    columns=[
        TableDefinition.Column("Sno", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Sale", SqlType.big_int()),
        TableDefinition.Column("Profit", SqlType.big_int()),
        #TableDefinition.Column("Date", SqlType.date())
    ])

data_to_insert = [["1", "Vishakha", 99999, 80]]


def run_create_hyper_file_from_csv():

    print("Inside Fucntion to pick data from CSV into table in new Hyper file")
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists
        with Connection(
                endpoint=hyper.endpoint,
Пример #13
0
path_to_database = Path(hyper_file_path)

# The table is called "Extract" and will be created in the "Extract" schema
# and contains four columns.
extract_table = TableDefinition(
    table_name=table_name,
    columns=[
        TableDefinition.Column(name='Customer ID',
                               type=SqlType.text(),
                               nullability=NULLABLE),
        TableDefinition.Column(name='Customer Name',
                               type=SqlType.text(),
                               nullability=NULLABLE),
        TableDefinition.Column(name='Loyalty Reward Points',
                               type=SqlType.big_int(),
                               nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Segment',
                               type=SqlType.text(),
                               nullability=NULLABLE)
    ])


def insert_data():
    """
    Creates a simple .hyper file. For more on this, see the below example:
    https://github.com/tableau/hyper-api-samples/blob/main/Tableau-Supported/Python/insert_data_into_single_table.py
    """
    print("Creating single table for conversion.")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
def sparkConnect():
    # fetching DF from spark filestore
    if cf.file_type == 'csv':
        df = spark.read.format(cf.file_type) \
            .option("inferSchema", cf.infer_schema) \
            .option("header", cf.first_row_is_header) \
            .option("sep", cf.delimiter) \
            .load(cf.input_file_path)
        # print('\n', cf.input_file_path, '\n', cf.schema, '\n')

    # fetching table from db from databricks
    elif cf.file_type == 'jdbc':
        df = spark.read.format("jdbc") \
            .option("driver", cf.driver) \
            .option("url", cf.url) \
            .option("dbtable", cf.table) \
            .option("user", cf.user) \
            .option("password", cf.password) \
            .option("inferSchema", cf.infer_schema) \
            .option("header", cf.first_row_is_header) \
            .load()

        df.write.format("csv") \
            .option("enoding", cf.charset) \
            .option("header", cf.first_row_is_header) \
            .option("sep", cf.delimiter) \
            .save('/home/hari/HyperConverter/test')

        # pdf = df.select('*').toPandas()
        # path = '/home/hari/HyperConverter/test.csv'
        # pdf.to_csv(path, sep=',', index=False)

        path = glob.glob('/home/hari/HyperConverter/test/part*.csv')
        cf.input_file_path = path[0]
        cf.input_file_path = path
        print('\n', cf.input_file_path, '\n')

    col = list(df.dtypes)
    print(col)
    print(len(col))
    for i in range(len(col)):
        col[i] = list(col[i])
        col[i][1] = type_[col[i][1]]
    # print('\n', col, '\n')

    x = []
    for i, j in col:
        print(i, j)
        if j == 'varchar':
            max_length = df.agg({i: "max"}).collect()[0]
            #print(max_length)
            xyz = max_length["max({})".format(i)]

            if xyz != None:
                max_length = len(xyz)
                if 19 <= max_length <= 40:
                    max_length = 100
                else:
                    max_length = 30
            else:
                max_length = 35
            print(max_length)
            x.append(
                TableDefinition.Column(i, SqlType.varchar(max_length),
                                       NULLABLE))
        elif j == 'int':
            x.append(TableDefinition.Column(i, SqlType.int(), NULLABLE))
        elif j == 'date':
            x.append(TableDefinition.Column(i, SqlType.date(), NULLABLE))
        elif j == 'numeric':
            x.append(
                TableDefinition.Column(i, SqlType.numeric(10, 4), NULLABLE))
        elif j == 'bool':
            x.append(TableDefinition.Column(i, SqlType.bool(), NULLABLE))
        elif j == 'big_int':
            x.append(TableDefinition.Column(i, SqlType.big_int(), NULLABLE))
        elif j == 'double':
            x.append(TableDefinition.Column(i, SqlType.double(), NULLABLE))
        elif j == 'text':
            print("this is culprate", i, j)
            x.append(TableDefinition.Column(i, SqlType.text(), NULLABLE))
    print(x)
    print(len(x))
    return x
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

data_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="COVID-19",
    columns=[
        TableDefinition.Column("Country/Region", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Province/State", SqlType.text(), NULLABLE),
        TableDefinition.Column("Latitude", SqlType.double(), NOT_NULLABLE),
        TableDefinition.Column("Longitude", SqlType.double(), NOT_NULLABLE),
        TableDefinition.Column("Case_Type", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Date", SqlType.date(), NOT_NULLABLE),
        TableDefinition.Column("Cases", SqlType.big_int(), NOT_NULLABLE),
        TableDefinition.Column("Difference", SqlType.big_int(), NOT_NULLABLE),
        TableDefinition.Column("Last_Update_Date", SqlType.date(),
                               NOT_NULLABLE),
    ])


def run_create_hyper_file_from_csv():
    """
    An example demonstrating loading data from a csv into a new Hyper file
    """
    print("EXAMPLE - Load data from CSV into table in new Hyper file")

    path_to_database = Path("covid-19.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
Пример #16
0
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

product_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="Products",
    columns=[
        TableDefinition.Column("category", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("title", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("price", SqlType.double(), NOT_NULLABLE),
        TableDefinition.Column("available_quantity", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("sold_quantity", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("permalink", SqlType.text(), NOT_NULLABLE)
    ])


def call_mlapi_to_dict(SearchCategory):

    print("Start call_mlapi_to_dict.")

    try:
        query = {'1': SearchCategory, 'limit': '50'}
        response = requests.get(
            'https://api.mercadolibre.com/sites/MLA/search?q=' +
Пример #17
0
    HyperProcess,
    Telemetry,
    Connection,
    SqlType,
    TableDefinition,
    CreateMode,
    TableName,
    Inserter,
)

dtype_mapper = {
    "string": SqlType.text(),
    "str": SqlType.text(),
    "object": SqlType.text(),
    "O": SqlType.text(),
    "int64": SqlType.big_int(),
    "float64": SqlType.double(),
    "bool": SqlType.bool(),
    "datetime64[ns]": SqlType.timestamp(),
    "timedelta[ns]": SqlType.interval(),
    "category": SqlType.text(),
}


def read_hyper(path_to_hyper_file, custom_schema="Extract"):
    """Read a Tableau Hyper file and turn it into a Pandas DataFrame.

    Currently can only read single table extracts, which is Tableau's
    default way of creating an extract.

    Args:
Пример #18
0
from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException


customer_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="test02",
    columns=[
        TableDefinition.Column("ID", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Mathematics", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Science", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("English", SqlType.big_int(), NOT_NULLABLE),
        TableDefinition.Column("Japanese", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("History", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Geography", SqlType.text(), NOT_NULLABLE),
    ]
)


def run_create_hyper_file_from_csv():
    """
    An example demonstrating loading data from a csv into a new Hyper file
    """
    print("EXAMPLE - Load data from CSV into table in new Hyper file")

    path_to_database = Path("test02.hyper")
Пример #19
0
from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

customer_table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name="Customer",
    columns=[
        TableDefinition.Column("Customer ID", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Customer Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Loyalty Reward Points", SqlType.big_int(),
                               NOT_NULLABLE),
        TableDefinition.Column("Segment", SqlType.text(), NOT_NULLABLE)
    ])


def run_create_hyper_file_from_csv():
    """
    An example demonstrating loading data from a csv into a new Hyper file
    """
    print("EXAMPLE - Load data from CSV into table in new Hyper file")

    path_to_database = Path("customer.hyper")

    # Optional process parameters.
    # They are documented in the Tableau Hyper documentation, chapter "Process Settings"
Пример #20
0
def create():
    with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        request_data = request.get_json()
        print("The HyperProcess has started.")
        object_name = "tdxdemo.hyper"
        file_name = os.environ.get('bucket_name')

        with Connection(
                endpoint=hyper.endpoint,
                database=path_to_database,
                create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            print("The connection to the Hyper file is open.")
            connection.catalog.create_schema('Extract')
            example_table = TableDefinition(TableName('Extract', 'Extract'), [
                TableDefinition.Column('activityId', SqlType.big_int()),
                TableDefinition.Column('activityType', SqlType.text()),
                TableDefinition.Column('contactId', SqlType.big_int()),
                TableDefinition.Column('industry', SqlType.text()),
                TableDefinition.Column('accountId', SqlType.text()),
                TableDefinition.Column('accountName', SqlType.text()),
                TableDefinition.Column('activityDate', SqlType.text()),
                TableDefinition.Column('company', SqlType.text()),
                TableDefinition.Column('name', SqlType.text()),
                TableDefinition.Column('activitiesTotal', SqlType.big_int()),
                TableDefinition.Column('activitiesPerContact',
                                       SqlType.big_int()),
                TableDefinition.Column('contactsTotal', SqlType.big_int()),
                TableDefinition.Column('totalFormsSubmitted',
                                       SqlType.big_int()),
                TableDefinition.Column('totalPageViews', SqlType.big_int()),
                TableDefinition.Column('totalWebVisits', SqlType.big_int()),
            ])
            print("The table is defined.")
            connection.catalog.create_table(example_table)
            with Inserter(connection, example_table) as inserter:
                for i in request_data:
                    inserter.add_row([
                        i['activityId'], i['activityType'], i['contactId'],
                        i['industry'], i['accountId'], i['accountName'],
                        i['activityDate'], i['company'], i['name'],
                        i['activitiesTotal'], i['activitiesPerContact'],
                        i['contactsTotal'], i['totalFormsSubmitted'],
                        i['totalPageViews'], i['totalWebVisits']
                    ])

                inserter.execute()
                print("The data was added to the table.")

            print("The connection to the Hyper extract file is closed.")
        print("The HyperProcess has shut down.")

        with open('tdxdemo.hyper', 'rb') as reader:
            if object_name is None:
                object_name = file_name
            s3_client = boto3.client(
                's3',
                aws_access_key_id=os.environ.get('aws_access_key_id'),
                aws_secret_access_key=os.environ.get('aws_secret_access_key'))
            try:
                response = s3_client.upload_fileobj(
                    reader,
                    file_name,
                    object_name,
                    ExtraArgs={'ACL': 'public-read'})
            except ClientError as e:
                logging.error(e)
                return False

    return redirect(url_for('index'))
Пример #21
0
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    HyperException

#Create table definition to be used in Hyper file
workbook_table = TableDefinition(
    table_name="Workbooks",
    columns=[
        TableDefinition.Column("Project Id", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Content URL", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Created At", SqlType.date(), NOT_NULLABLE),
        TableDefinition.Column("Id", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("Project Name", SqlType.text(), NOT_NULLABLE),
        TableDefinition.Column("size", SqlType.big_int(), NOT_NULLABLE),
        TableDefinition.Column("Updated At", SqlType.date(), NOT_NULLABLE),
        TableDefinition.Column("Name", SqlType.text(), NOT_NULLABLE)
    ])
#Name of hyper file that will be generated locally and deployed to Tableau
path_to_database = Path("workbooks.hyper")
#Name of project where Hyper file will be deployed
#This project must exist on your server
PROJECT = 'HyperTest'


def main():

    #Read in environment variables necessary to make connection
    with open('env.yaml') as f:
        env = yaml.load(f, Loader=yaml.FullLoader)
Пример #22
0
            else:
                max_length = 30
        else:
            max_length = 35

        x.append(TableDefinition.Column(i, SqlType.varchar(max_length), NULLABLE))
    elif j == 'int':
        x.append(TableDefinition.Column(i, SqlType.int(), NULLABLE))
    elif j == 'date':
        x.append(TableDefinition.Column(i, SqlType.date(), NULLABLE))
    elif j == 'numeric':
        x.append(TableDefinition.Column(i, SqlType.numeric(10, 4), NULLABLE))
    elif j == 'bool':
        x.append(TableDefinition.Column(i, SqlType.bool(), NULLABLE))
    elif j == 'big_int':
        x.append(TableDefinition.Column(i, SqlType.big_int(), NULLABLE))
    elif j == 'double':
        x.append(TableDefinition.Column(i, SqlType.numeric(10, 4), NULLABLE))
    else:
        x.append(TableDefinition.Column(i, SqlType.text(), NULLABLE))
    print(i, j, max_length)
    print(x[len(x)-1], len(x))
print(len(x))

table = TableDefinition(
    # Since the table name is not prefixed with an explicit schema name, the table will reside in the default "public" namespace.
    table_name=cf.table_name,
    columns=x
)

def run_create_hyper_file_from_csv():
Пример #23
0
import os
import json
import sys

from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode, NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, escape_string_literal

# SqlTypeをdictで参照
sql_type_dict = {}
sql_type_dict['BIG_INT'] = SqlType.big_int()
sql_type_dict['TEXT'] = SqlType.text()
sql_type_dict['DOUBLE'] = SqlType.double()
sql_type_dict['DATE'] = SqlType.date()
sql_type_dict['TIMESTAMP'] = SqlType.timestamp()
# NULLABLEをdictで参照
nullable_dict = {}
nullable_dict['YES'] = NULLABLE
nullable_dict['NO'] = NOT_NULLABLE


def create_column_def(table_def_dict: dict):

    column_def = []
    for key in table_def_dict.keys():
        column_def.append(
            TableDefinition.Column(
                key, sql_type_dict[table_def_dict[key]['type']],
                nullable_dict[table_def_dict[key]['nullable']]))

    return column_def

Пример #24
0
from tableauhyperapi import Connection, HyperProcess, SqlType, TableDefinition, \
    escape_string_literal, escape_name, NOT_NULLABLE, Telemetry, Inserter, CreateMode, TableName

def run_create_hyper_file():
	with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
    print("The HyperProcess has started.")

    with Connection(hyper.endpoint, 'TrivialExample.hyper', CreateMode.CREATE_AND_REPLACE) as connection:
        print("The connection to the Hyper file is open.")
        connection.catalog.create_schema('Extract')
        example_table = TableDefinition(TableName('Extract','Extract'), [
            TableDefinition.Column('rowID', SqlType.big_int()),
            TableDefinition.Column('value', SqlType.big_int()),
         ])
        print("The table is defined.")
        connection.catalog.create_table(example_table)
        with Inserter(connection, example_table) as inserter:
            for i in range (1, 101):
                inserter.add_row(
                    [ i, i ]
            )
            inserter.execute()
        print("The data was added to the table.")
    print("The connection to the Hyper extract file is closed.")
print("The HyperProcess has shut down.")

if __name__ == "__main__":
	try:
        run_create_hyper_file()
    except HyperException as ex:
        print(ex)