Пример #1
0
def workbook_owners_publish():

    workbook_owners_dict = workbook_owners()

    with hyp.HyperProcess(hyp.Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        print("The HyperProcess has started.")

        with hyp.Connection(hyper.endpoint, '../data/workbook_owners.hyper',
                            hyp.CreateMode.CREATE_AND_REPLACE) as connection:
            print("The connection to the Hyper file is open.")

            connection.catalog.create_schema('Extract')

            table = hyp.TableDefinition(hyp.TableName('Extract', 'Extract'), [
                hyp.TableDefinition.Column('workbook_name',
                                           hyp.SqlType.text()),
                hyp.TableDefinition.Column('owner', hyp.SqlType.text())
            ])

            print("The table is defined.")

            connection.catalog.create_table(table)

            with hyp.Inserter(connection, table) as inserter:

                for i in workbook_owners_dict:

                    inserter.add_row([i['workbook_name'], i['owner']])

                inserter.execute()

            print("The data was added to the table.")
        print("The connection to the Hyper extract file is closed.")
    print("The HyperProcess has shut down.")
Пример #2
0
def _insert_frame(
    df: pd.DataFrame,
    *,
    connection: tab_api.Connection,
    table: pantab_types.TableType,
    table_mode: str,
) -> None:
    _validate_table_mode(table_mode)

    if isinstance(table, str):
        table = tab_api.TableName(table)

    # Populate insertion mechanisms dependent on column types
    column_types: List[pantab_types._ColumnType] = []
    columns: List[tab_api.TableDefinition.Column] = []
    for col_name, dtype in df.dtypes.items():
        column_type = _pandas_to_tableau_type(dtype.name)
        column_types.append(column_type)
        columns.append(
            tab_api.TableDefinition.Column(
                name=col_name,
                type=column_type.type_,
                nullability=column_type.nullability,
            ))

    # Sanity check for existing table structures
    if table_mode == "a" and connection.catalog.has_table(table):
        table_def = connection.catalog.get_table_definition(table)
        _assert_columns_equal(columns, table_def.columns)
    else:  # New table, potentially new schema
        table_def = tab_api.TableDefinition(table)

        for column, column_type in zip(columns, column_types):
            table_def.add_column(column)

        if isinstance(table, tab_api.TableName) and table.schema_name:
            connection.catalog.create_schema_if_not_exists(table.schema_name)

        connection.catalog.create_table_if_not_exists(table_def)

    null_mask = np.ascontiguousarray(pd.isnull(df))
    # Special handling for conversions
    df, dtypes = _maybe_convert_timedelta(df)

    with tab_api.Inserter(connection, table_def) as inserter:
        # This is a terrible hack but I couldn't find any other way to expose
        # the memory address of the cdata object at runtime in the Python runtime
        # take something like <cdata 'hyper_inserter_buffer_t *' 0x7f815192ec60>
        # and extract just 0x7f815192ec60
        # ffi.addressof did not work because this is an opaque pointer
        address = int(str(inserter._buffer)[:-1].split()[-1], base=16)
        libwriter.write_to_hyper(
            df.itertuples(index=False, name=None),
            null_mask,
            address,
            df.shape[1],
            dtypes,
        )
        inserter.execute()
Пример #3
0
def _insert_frame(
    df: pd.DataFrame,
    *,
    connection: tab_api.Connection,
    table: pantab_types.TableType,
    table_mode: str,
) -> None:
    _validate_table_mode(table_mode)

    if isinstance(table, str):
        table = tab_api.TableName(table)

    # Populate insertion mechanisms dependent on column types
    column_types: List[pantab_types._ColumnType] = []
    columns: List[tab_api.TableDefinition.Column] = []
    for col_name, dtype in df.dtypes.items():
        column_type = _pandas_to_tableau_type(dtype.name)
        column_types.append(column_type)
        columns.append(
            tab_api.TableDefinition.Column(
                name=col_name,
                type=column_type.type_,
                nullability=column_type.nullability,
            ))

    # Sanity check for existing table structures
    if table_mode == "a" and connection.catalog.has_table(table):
        table_def = connection.catalog.get_table_definition(table)
        _assert_columns_equal(columns, table_def.columns)
    else:  # New table, potentially new schema
        table_def = tab_api.TableDefinition(table)

        for column, column_type in zip(columns, column_types):
            table_def.add_column(column)

        if isinstance(table, tab_api.TableName) and table.schema_name:
            connection.catalog.create_schema_if_not_exists(table.schema_name)

        connection.catalog.create_table_if_not_exists(table_def)

    null_mask = np.ascontiguousarray(pd.isnull(df))
    # Special handling for conversions
    df, dtypes = _maybe_convert_timedelta(df)

    with tab_api.Inserter(connection, table_def) as inserter:
        libwriter.write_to_hyper(
            df.itertuples(index=False, name=None),
            null_mask,
            inserter._buffer,
            df.shape[1],
            dtypes,
        )
        inserter.execute()
Пример #4
0
def main(argv: Optional[List[str]] = None) -> int:
    if argv is None:
        argv = sys.argv[1:]
    parser = argparse.ArgumentParser()
    parser.add_argument("input_json_filepaths", type=Path, nargs="+")
    args_ns = parser.parse_args(argv)

    input_json_filepaths = args_ns.input_json_filepaths
    if len(input_json_filepaths) > 1:
        raise NotImplementedError("Only one input supported at this time")

    # create a dict of scan ids to GraphSets. This contains all of the data in the provided input.
    scan_ids_graph_sets: Dict[int, GraphSet] = {
        scan_id: GraphSet.from_json_file(filepath)
        for scan_id, filepath in enumerate(input_json_filepaths)
    }

    # discover tables which need to be created by iterating over resources and finding the maximum
    # set of predicates used for each type
    table_defns = build_table_defns(scan_ids_graph_sets.values())

    # build data
    table_names_datas = build_data(scan_ids_graph_sets.values(), table_defns)

    table_names_tables: Dict[str, tableauhyperapi.TableDefinition] = {}
    with tableauhyperapi.HyperProcess(
            telemetry=tableauhyperapi.Telemetry.
            DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with tableauhyperapi.Connection(
                endpoint=hyper.endpoint,
                database="altimeter.hyper",
                create_mode=tableauhyperapi.CreateMode.CREATE_AND_REPLACE,
        ) as connection:
            # create tables
            for table_name, columns in table_defns.items():
                table = tableauhyperapi.TableDefinition(
                    table_name=table_name,
                    columns=[column.to_hyper() for column in columns])
                connection.catalog.create_table(table)
                table_names_tables[table_name] = table

            for table_name, datas in table_names_datas.items():
                with tableauhyperapi.Inserter(
                        connection,
                        table_names_tables[table_name]) as inserter:
                    inserter.add_rows(datas)
                    inserter.execute()

    return 0
# This utility script will help generate those files which can be
# incorporate into testing

import tableauhyperapi as tab_api

if __name__ == "__main__":

    table = tab_api.TableDefinition(
        table_name=tab_api.TableName("public", "table"),
        columns=[
            tab_api.TableDefinition.Column(
                name="Non-Nullable String",
                type=tab_api.SqlType.text(),
                nullability=tab_api.NOT_NULLABLE,
            )
        ],
    )

    with tab_api.HyperProcess(telemetry=tab_api.Telemetry.
                              DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        with tab_api.Connection(
                endpoint=hyper.endpoint,
                database="non_pantab_writeable.hyper",
                create_mode=tab_api.CreateMode.CREATE_AND_REPLACE,
        ) as connection:
            connection.catalog.create_table(table_definition=table)

            with tab_api.Inserter(connection, table) as inserter:
                inserter.add_rows([["row1"], ["row2"]])
                inserter.execute()