def workbook_owners_publish(): workbook_owners_dict = workbook_owners() with hyp.HyperProcess(hyp.Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper: print("The HyperProcess has started.") with hyp.Connection(hyper.endpoint, '../data/workbook_owners.hyper', hyp.CreateMode.CREATE_AND_REPLACE) as connection: print("The connection to the Hyper file is open.") connection.catalog.create_schema('Extract') table = hyp.TableDefinition(hyp.TableName('Extract', 'Extract'), [ hyp.TableDefinition.Column('workbook_name', hyp.SqlType.text()), hyp.TableDefinition.Column('owner', hyp.SqlType.text()) ]) print("The table is defined.") connection.catalog.create_table(table) with hyp.Inserter(connection, table) as inserter: for i in workbook_owners_dict: inserter.add_row([i['workbook_name'], i['owner']]) inserter.execute() print("The data was added to the table.") print("The connection to the Hyper extract file is closed.") print("The HyperProcess has shut down.")
def _insert_frame( df: pd.DataFrame, *, connection: tab_api.Connection, table: pantab_types.TableType, table_mode: str, ) -> None: _validate_table_mode(table_mode) if isinstance(table, str): table = tab_api.TableName(table) # Populate insertion mechanisms dependent on column types column_types: List[pantab_types._ColumnType] = [] columns: List[tab_api.TableDefinition.Column] = [] for col_name, dtype in df.dtypes.items(): column_type = _pandas_to_tableau_type(dtype.name) column_types.append(column_type) columns.append( tab_api.TableDefinition.Column( name=col_name, type=column_type.type_, nullability=column_type.nullability, )) # Sanity check for existing table structures if table_mode == "a" and connection.catalog.has_table(table): table_def = connection.catalog.get_table_definition(table) _assert_columns_equal(columns, table_def.columns) else: # New table, potentially new schema table_def = tab_api.TableDefinition(table) for column, column_type in zip(columns, column_types): table_def.add_column(column) if isinstance(table, tab_api.TableName) and table.schema_name: connection.catalog.create_schema_if_not_exists(table.schema_name) connection.catalog.create_table_if_not_exists(table_def) null_mask = np.ascontiguousarray(pd.isnull(df)) # Special handling for conversions df, dtypes = _maybe_convert_timedelta(df) with tab_api.Inserter(connection, table_def) as inserter: # This is a terrible hack but I couldn't find any other way to expose # the memory address of the cdata object at runtime in the Python runtime # take something like <cdata 'hyper_inserter_buffer_t *' 0x7f815192ec60> # and extract just 0x7f815192ec60 # ffi.addressof did not work because this is an opaque pointer address = int(str(inserter._buffer)[:-1].split()[-1], base=16) libwriter.write_to_hyper( df.itertuples(index=False, name=None), null_mask, address, df.shape[1], dtypes, ) inserter.execute()
def _insert_frame( df: pd.DataFrame, *, connection: tab_api.Connection, table: pantab_types.TableType, table_mode: str, ) -> None: _validate_table_mode(table_mode) if isinstance(table, str): table = tab_api.TableName(table) # Populate insertion mechanisms dependent on column types column_types: List[pantab_types._ColumnType] = [] columns: List[tab_api.TableDefinition.Column] = [] for col_name, dtype in df.dtypes.items(): column_type = _pandas_to_tableau_type(dtype.name) column_types.append(column_type) columns.append( tab_api.TableDefinition.Column( name=col_name, type=column_type.type_, nullability=column_type.nullability, )) # Sanity check for existing table structures if table_mode == "a" and connection.catalog.has_table(table): table_def = connection.catalog.get_table_definition(table) _assert_columns_equal(columns, table_def.columns) else: # New table, potentially new schema table_def = tab_api.TableDefinition(table) for column, column_type in zip(columns, column_types): table_def.add_column(column) if isinstance(table, tab_api.TableName) and table.schema_name: connection.catalog.create_schema_if_not_exists(table.schema_name) connection.catalog.create_table_if_not_exists(table_def) null_mask = np.ascontiguousarray(pd.isnull(df)) # Special handling for conversions df, dtypes = _maybe_convert_timedelta(df) with tab_api.Inserter(connection, table_def) as inserter: libwriter.write_to_hyper( df.itertuples(index=False, name=None), null_mask, inserter._buffer, df.shape[1], dtypes, ) inserter.execute()
def main(argv: Optional[List[str]] = None) -> int: if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser() parser.add_argument("input_json_filepaths", type=Path, nargs="+") args_ns = parser.parse_args(argv) input_json_filepaths = args_ns.input_json_filepaths if len(input_json_filepaths) > 1: raise NotImplementedError("Only one input supported at this time") # create a dict of scan ids to GraphSets. This contains all of the data in the provided input. scan_ids_graph_sets: Dict[int, GraphSet] = { scan_id: GraphSet.from_json_file(filepath) for scan_id, filepath in enumerate(input_json_filepaths) } # discover tables which need to be created by iterating over resources and finding the maximum # set of predicates used for each type table_defns = build_table_defns(scan_ids_graph_sets.values()) # build data table_names_datas = build_data(scan_ids_graph_sets.values(), table_defns) table_names_tables: Dict[str, tableauhyperapi.TableDefinition] = {} with tableauhyperapi.HyperProcess( telemetry=tableauhyperapi.Telemetry. DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with tableauhyperapi.Connection( endpoint=hyper.endpoint, database="altimeter.hyper", create_mode=tableauhyperapi.CreateMode.CREATE_AND_REPLACE, ) as connection: # create tables for table_name, columns in table_defns.items(): table = tableauhyperapi.TableDefinition( table_name=table_name, columns=[column.to_hyper() for column in columns]) connection.catalog.create_table(table) table_names_tables[table_name] = table for table_name, datas in table_names_datas.items(): with tableauhyperapi.Inserter( connection, table_names_tables[table_name]) as inserter: inserter.add_rows(datas) inserter.execute() return 0
# This utility script will help generate those files which can be # incorporate into testing import tableauhyperapi as tab_api if __name__ == "__main__": table = tab_api.TableDefinition( table_name=tab_api.TableName("public", "table"), columns=[ tab_api.TableDefinition.Column( name="Non-Nullable String", type=tab_api.SqlType.text(), nullability=tab_api.NOT_NULLABLE, ) ], ) with tab_api.HyperProcess(telemetry=tab_api.Telemetry. DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: with tab_api.Connection( endpoint=hyper.endpoint, database="non_pantab_writeable.hyper", create_mode=tab_api.CreateMode.CREATE_AND_REPLACE, ) as connection: connection.catalog.create_table(table_definition=table) with tab_api.Inserter(connection, table) as inserter: inserter.add_rows([["row1"], ["row2"]]) inserter.execute()