def to_tableau( self, path, add_index=False ): ''' Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param tableName = name of the table in the extract ''' # Delete Extract and debug log is already exist for file in [path, os.path.dirname(path) + '/debug.log', './DataExtract.log','./debug.log']: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize( ) new_extract = Extract( path ) table_def = TableDefinition() # Set columns in Tableau if add_index: table_def.addColumn( 'index', Type.INTEGER ) for col_index, col_name in enumerate(self._dataframe): table_def.addColumn( col_name, self._column_static_type[col_index] ) # Create table new_table = new_extract.addTable( "Extract", table_def ) # Set Column values self.set_column_values( new_table, table_def, add_index ) # Close extract new_extract.close() ExtractAPI.cleanup()
def createOrOpenExtract(filename, useSpatial): try: # Create Extract Object # (NOTE: The Extract constructor opens an existing extract with the # given filename if one exists or creates a new extract with the given # filename if one does not) extract = Extract(filename) # Define Table Schema (If we are creating a new extract) # (NOTE: In Tableau Data Engine, all tables must be named 'Extract') if (not extract.hasTable('Extract')): schema = TableDefinition() schema.setDefaultCollation(Collation.EN_GB) schema.addColumn('Purchased', Type.DATETIME) schema.addColumn('Product', Type.CHAR_STRING) schema.addColumn('uProduct', Type.UNICODE_STRING) schema.addColumn('Price', Type.DOUBLE) schema.addColumn('Quantity', Type.INTEGER) schema.addColumn('Taxed', Type.BOOLEAN) schema.addColumn('Expiration Date', Type.DATE) schema.addColumnWithCollation('Produkt', Type.CHAR_STRING, Collation.DE) if (useSpatial): schema.addColumn('Destination', Type.SPATIAL) table = extract.addTable('Extract', schema) if (table == None): print 'A fatal error occurred while creating the table:\nExiting now\n.' exit(-1) except TableauException, e: print 'A fatal error occurred while creating the new extract:\n', e, '\nExiting now.' exit(-1)
class TDEExport(object): @staticmethod def make_table_definition(schema): table_def = TableDefinition() table_def.setDefaultCollation(Collation.EN_GB) for col in schema: table_def.addColumn(col['name'], typeMap.get(col["type"],Type.UNICODE_STRING)) return table_def def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema self.errors = 0 self.nrows = 0 print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def) def __enter__(self): return self def __exit__(self, type, value, tb): self.close(); def close(self): self.extract.close() ExtractAPI.cleanup() if self.errors > 0: print "Encountered %d errors" % self.errors def insert_array_row(self, input_row): # Work around bug in DSS 3.1.0 API for single-column datasets if len(input_row) == 0 and len(self.input_schema) == 1: input_row = [u''] nb_cols = len(self.input_schema) output_row = Row(self.table_def) for col_no in range(nb_cols): col = self.input_schema[col_no] data = input_row[col_no] try: fieldSetterMap[col['type']](output_row, col_no, data) except Exception, e: self.errors += 1 if self.errors < 100 or (self.errors < 10000 and self.errors % 100 == 0) or (self.errors % 1000 ==0): try: print "[err #%s] Failed setting: col=%s type=%s val=%s err=%s" % (self.errors, col["name"], col["type"], data, e) except Exception, e2: print "[err #%s] Failed setting: col=%s type=%s val=NON_PRINTED err=%s" % (self.errors, col["name"], col["type"],e)
class TDEExport(object): @staticmethod def convert_type(type): return typeMap.get(type,Type.UNICODE_STRING) @staticmethod def make_table_definition(schema): table_def = TableDefinition() table_def.setDefaultCollation(Collation.EN_GB) for col in schema: table_def.addColumn(col['name'], TDEExport.convert_type(col['type'])) return table_def def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def) def __enter__(self): return self def __exit__(self, type, value, tb): self.close(); def close(self): self.extract.close() ExtractAPI.cleanup() def insert_array_row(self, input_row): # Work around bug in DSS 3.1.0 API for single-column datasets if len(input_row) == 0 and len(self.input_schema) == 1: input_row = [u''] nb_cols = len(self.input_schema) output_row = Row(self.table_def) for col_no in range(nb_cols): col = self.input_schema[col_no] data = input_row[col_no] try: fieldSetterMap[col['type']](output_row, col_no, data) except Exception, e: print "Failed setting field %s to value %s: %s" % (col["name"], data, e) pass self.table.insert(output_row)
def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema self.errors = 0 self.nrows = 0 print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def)
def new_tde(self, data): # Step 1: Create the Extract File dataExtract = Extract(self.tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition cols = data.columns.tolist() dataSchema = TableDefinition() for col in cols: dataSchema.addColumn(col, Type.UNICODE_STRING) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) cols = data.columns.tolist() for i in range(0, len(data)): for col in cols: col_index = cols.index(col) newRow.setString(col_index, data[col][i]) table.insert(newRow) dataExtract.close() # Step 5: Close the tde dataExtract.close()
def tde_export(): print "Start export to TDE" input_name = get_input_names_for_role('input')[0] input_dataset = dataiku.Dataset(input_name) input_schema = input_dataset.read_schema() partitions = input_dataset.list_partitions(raise_if_empty=False) if partitions not in [[], [u'NP']]: raise Exception( "Due to the current APIs, this plugin cannot support partitioned input " "(and it seems the input dataset " + input_name + " is partitioned). " "A workaround is to first run a sync recipe " "from " + input_name + " into a non partitioned dataset, " "then take the latter as input for tde export.") output_name = get_output_names_for_role('output_folder')[0] output_folder = dataiku.Folder(output_name) output_path = output_folder.get_path() os.chdir(output_path) # Clean output dir. We assume there is no subfolder. # (because this recipe never creates one. If there is, better fail than remove someone else's data) for file in os.listdir(output_path): os.remove(file) ExtractAPI.initialize() with Extract(output_filename()) as extract: assert (not extract.hasTable('Extract')) tableDef = makeTableDefinition(input_schema) table = extract.addTable('Extract', tableDef) insertData(input_dataset, table) extract.close() ExtractAPI.cleanup() print "End export to TDE"
def __init__(self, extract_file_path, input_schema): self.extract_file_path = extract_file_path self.output_path = os.path.dirname(self.extract_file_path) self.input_schema = input_schema self.format_type = TABLEAU_FORMAT self.errors = 0 self.nrows = 0 print("Writing Extract {} file: {}".format(self.format_type, self.extract_file_path)) ExtractAPI.initialize() self.extract = Extract(self.extract_file_path) assert (not self.extract.hasTable('Extract')) self.table_def = TableauExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def)
def __init__(self, tde_file_path, input_schema): self.tde_file_path = tde_file_path self.output_path = os.path.dirname(self.tde_file_path) self.input_schema = input_schema print "Writing TDE file: %s" % self.tde_file_path ExtractAPI.initialize() self.extract = Extract(self.tde_file_path) assert(not self.extract.hasTable('Extract')) self.table_def = TDEExport.make_table_definition(self.input_schema) self.table = self.extract.addTable('Extract', self.table_def)
def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete Extract and debug log is already exist for file in [path, os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log']: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) table_def = TableDefinition() # Set columns in Tableau if add_index: index_col = 'index' suffix = 1 while index_col in self._dataframe.columns: index_col = '%s_%s' % (index_col, suffix) suffix += 1 table_def.addColumn(index_col, Type.INTEGER) for col_index, col_name in enumerate(self._dataframe): table_def.addColumn(col_name, self._column_static_type[col_index]) # Create table new_table = new_extract.addTable(table_name, table_def) # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
def create_tde(self, tde_filename, append=False): try: # Using "with" handles closing the TDE correctly with Extract("{}".format(tde_filename)) as extract: self.tde_object = None row_count = 0 # Create the Extract object (or set it for updating) if there are actually results if not extract.hasTable('Extract'): # Table does not exist; create it self.log(u'Creating Extract with table definition') self.tde_object = extract.addTable('Extract', self.table_definition) else: # Open an existing table to add more rows if append is True: self.tde_object = extract.openTable('Extract') else: self.log("Output file '" + tde_filename + "' already exists.") self.log( "Append mode is off, please delete file and then rerun..." ) sys.exit() # This is if you actually have data to put into the extract. Implement later # tde_row = Row(tableDef) # colNo = 0 # for field in db_row: # Possible for database to have types that do not map, we skip them # if cursor.description[colNo][1].__name__ in PyTypeMap: # if( (field == "" or field == None) ) : # tde_row.setNull( colNo ) # else : # From any given row from the cursor object, we can use the cursor_description collection to find information # for example, the column names and the datatypes. [0] is the column name string, [1] is the python type object. Mirrors cursor.description on the Row level # Second item is a Python Type object, to get the actual name as a string for comparison, have to use __name__ property # fieldSetterMap[PyTypeMap[ cursor.description[colNo][1].__name__ ] ](tde_row, colNo, field); # colNo += 1 # table.insert(tde_row) # row_count += 1 # print "TDE creation complete, " + str(row_count) + " rows inserted\n" # if len(skipped_cols) > 0: # print "The following columns were skipped due to datatypes that were not recognized:\n" # print skipped_cols except TableauException, e: self.log('Tableau TDE creation error:{}'.format(e)) raise
def writeTableToTDE(pgFields, pgData, extractLocation): print("writing table to {0}").format(extractLocation) # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() for i in range(0, len(pgFields)): table_definition.addColumn(pgFields[i][0], pgFields[i][1]) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row( table_definition) # Pass the table definition to the constructor # 6. Populate each new row numberRecords = len(pgData) for i in range(0, numberRecords): # Note that this doesn't cover all possible TDE data types for j in range(0, len(pgFields)): if pgFields[j][1] == Type.INTEGER: new_row.setInteger(j, pgData[i][j]) elif pgFields[j][1] == Type.UNICODE_STRING: new_row.setString(j, pgData[i][j]) elif pgFields[j][1] == Type.SPATIAL: new_row.setSpatial(j, pgData[i][j]) elif pgFields[j][i] == Type.BOOLEAN: new_row.setBoolean(j, pgData[i][j]) elif pgFields[j][i] == Type.DOUBLE: new_row.setDouble(j, pgData[j][i]) new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
def publishCsvDatasource(serverURL, siteName, username, password, filename): extractName = os.path.basename(filename).split('.', 2)[0] extractFilename = extractName + '.tde' try: os.remove(extractFilename) except OSError: pass ExtractAPI.initialize() with Extract(extractFilename) as extract: with open(filename) as csvfile: fileReader = csv.reader(csvfile, delimiter=',') headers = [] table = None for fileRow in fileReader: print('Reading: ' + str(fileRow)) if headers == []: headers = fileRow else: if table == None: tableDef = makeTableDefinition(headers, fileRow) table = extract.addTable('Extract', tableDef) insertData(table, fileRow) csvfile.close() extract.close() ExtractAPI.cleanup() # Initialize Tableau Server API ServerAPI.initialize() serverConnection = ServerConnection() serverConnection.connect(serverURL, username, password, siteName) # Publish order-py.tde to the server under the default project with name Order-py serverConnection.publishExtract(extractFilename, 'default', extractName, False) # Disconnect from the server serverConnection.disconnect() # Destroy the server connection object serverConnection.close() # Clean up Tableau Server API ServerAPI.cleanup()
def to_tableau(self, path, table_name='Extract', add_index=False): """ Converts a Pandas DataFrame to a Tableau .tde file @param path = path to write file @param table_name = name of the table in the extract @param add_index = adds incrementing integer index before dataframe columns """ # Delete dataextract log and hyper_db temp files if already exists files = (glob('DataExtract*.log') + glob('hyper_db_*') + [ os.path.dirname(path) + '/debug.log', './DataExtract.log', './debug.log' ]) for file in files: if os.path.isfile(file): os.remove(file) # Create Extract and Table ExtractAPI.initialize() new_extract = Extract(path) if not new_extract.hasTable(table_name): print( 'Table \'{}\' does not exist in extract {}, creating.'.format( table_name, path)) self.set_table_structure(new_extract, table_name, add_index) new_table = new_extract.openTable(table_name) table_def = new_table.getTableDefinition() # Set Column values self.set_column_values(new_table, table_def, add_index) # Close extract new_extract.close() ExtractAPI.cleanup()
for i in join_info: #joinedDF = pd.merge(outputDF, VPS_DF, on='IntubationCycle', how='left') joinDF_auto = pd.merge(joinDF_auto, i[0], on=i[1], how='inner') #output to CSV joinDF_auto.to_csv(output_CSV, index=False, encoding='utf-8') #typecast to date format joinDF_auto['ClaimDate'] = pd.to_datetime(joinDF_auto['ClaimDate']) #------------------------------------------------------------------------------ # Create Extract #------------------------------------------------------------------------------ ExtractAPI.initialize() new_extract = Extract(output_Extract) # Create a new table definition with 3 columns table_definition = TableDefinition() table_definition.addColumn('Patient_ID', Type.INTEGER) # column 0 table_definition.addColumn('ClaimDate', Type.DATE) # column 1 table_definition.addColumn('Patient Name', Type.UNICODE_STRING) # column 2 #table_definition.addColumn('Claim_ID', Type.INTEGER) # column 3 #table_definition.addColumn('Discount', Type.DOUBLE) # column 4 #------------------------------------------------------------------------------ # Populate Extract #------------------------------------------------------------------------------ new_table = new_extract.addTable('Extract', table_definition)
DEST_LON = 4 SEG_NAME = 9 DEP_TIME = 6 # Departure time (minutes from midnight) DEP_TIME_HRS = 0 FLIGHT_NUM = 2 FLIGHT_TIME = 5 ##################################################################### ## Process the data and write the TDE ##################################################################### # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('route', Type.UNICODE_STRING) # column 0 table_definition.addColumn('latitude', Type.DOUBLE) table_definition.addColumn('longitude', Type.DOUBLE) table_definition.addColumn('point_number', Type.INTEGER) table_definition.addColumn('distance_km', Type.DOUBLE) table_definition.addColumn('FlightNumber', Type.UNICODE_STRING) table_definition.addColumn('DepartureTime', Type.UNICODE_STRING) table_definition.addColumn('DepartureTimeFromMidnight', Type.UNICODE_STRING) table_definition.addColumn('SegMinFromMidnight', Type.INTEGER) table_definition.addColumn('FlightTime', Type.INTEGER) table_definition.addColumn('SegTimeFromMidnight', Type.UNICODE_STRING)
def create(target_filepath, dataframe): column = [] ## 宣告Dataframe(左)pandas資料格式 對應 到TDE的資料格式(右)。 fieldMap = { 'float64': tde.Types.Type.DOUBLE, 'float32': tde.Types.Type.DOUBLE, 'int64': tde.Types.Type.INTEGER, 'int8': tde.Types.Type.INTEGER, 'object': tde.Types.Type.UNICODE_STRING, 'bool': tde.Types.Type.DOUBLE, 'datetime64[ns]': tde.Types.Type.DATETIME } ## 若TDE已存在,則刪除。 if os.path.isfile(target_filepath): os.remove(target_filepath) tdefile = tde.Extract(target_filepath) schema = tde.TableDefinition() # define the table definition colnames = dataframe.columns # dataframe all column name coltypes = dataframe.dtypes # dataframe all column's datatype ## 根據Dataframe建立Tde Schema for i in range(0, len(colnames)): cname = colnames[i] ctype = fieldMap.get(str(coltypes[i])) schema.addColumn(cname, ctype) # 加入一行欄位(名稱,資料格式) column.append(cname) ## 開啟TDE FILE寫入資料 with tdefile as extract: table = extract.addTable("Extract", schema) for r in range(0, dataframe.shape[0]): row = tde.Row(schema) ## Loop 顯示剩餘筆數 count = dataframe.shape[0] - r - 1 print('剩' + str(count) + '筆', end="\r") ## 每筆資料依照格式寫入TDE內 for c in range(0, len(coltypes)): if str(coltypes[c]) == 'float64': if dataframe.iloc[r, c] is None: row.setNull(c) else: row.setDouble(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'float32': row.setDouble(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'int64': if dataframe.iloc[r, c] is None: row.setNull(c) else: row.setInteger(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'int8': row.setInteger(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'object': if dataframe.iloc[r, c] is None: row.setNull(c) else: row.setString(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'bool': row.setBoolean(c, dataframe.iloc[r, c]) elif str(coltypes[c]) == 'datetime64[ns]': try: dt = dataframe.iloc[r, c] row.setDateTime(c, dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, 0) except: row.setNull(c) else: row.setNull(c) ## insert the row table.insert(row) ## close the tdefile tdefile.close()
def main(): pp = pprint.PrettyPrinter(indent=2) key = raw_input("API Key: ") limit = 100 offset = 0 # Create TDE extract file if os.path.isfile(extract_file): os.remove(extract_file) ExtractAPI.initialize() tdefile = Extract(extract_file) # Create the table definition # Data types: INTEGER, DOUBLE, BOOLEAN, DATE, DATETIME, DURATION, CHAR_STRING, UNICODE_STRING tableDef = TableDefinition() tableDef.addColumn('alert_id', Type.CHAR_STRING) tableDef.addColumn('timestamp', Type.DATETIME) tableDef.addColumn('alert_type', Type.CHAR_STRING) tableDef.addColumn('alert_status', Type.CHAR_STRING) tableDef.addColumn('alert_network', Type.CHAR_STRING) tableDef.addColumn('notes', 16) #UNICODE_STRING tableDef.addColumn('metadata', 16) #UNICODE_STRING tableDef.addColumn('rule_name', Type.CHAR_STRING) tableDef.addColumn('severity', Type.INTEGER) tableDef.addColumn('asset_name', 16) #UNICODE_STRING tableDef.addColumn('asset_image', Type.CHAR_STRING) tableDef.addColumn('perpetrator_username', 16) #UNICODE_STRING tableDef.addColumn('perpetrator_displayname', 16) #UNICODE_STRING tableDef.addColumn('perpetrator_type', Type.CHAR_STRING) tableDef.addColumn('perpetrator_image', Type.CHAR_STRING) tableDef.addColumn('takedown_request_time', Type.DATETIME) tableDef.addColumn('takedown_accept_time', Type.DATETIME) tableDef.addColumn('close_time', Type.DATETIME) # Create table in image of tableDef table = tdefile.addTable('Extract', tableDef) rowcount = 0 done = 'n' while done == 'n': response = get_page(key, limit, offset) alerts = response['alerts'] if len(alerts) == 0: done = 'y' break for alert in alerts: newrow = createrow(alert,tableDef) table.insert(newrow) rowcount +=1 if rowcount >= LIMIT: done = 'y' break offset += 100 tdefile.close() ExtractAPI.cleanup() print str(rowcount) + " alerts processed"
pass ###Setting Tableau recognized data types### for i in column_types: if i is numpy.int64: column_headers_types.append(Type.INTEGER) elif i is numpy.float64: column_headers_types.append(Type.DOUBLE) elif i is pd.Timestamp: column_headers_types.append(Type.DATETIME) else: column_headers_types.append(Type.UNICODE_STRING) ###Getting the existing table schema for incremental refresh### if incremental_refresh == 1: dataExtract = Extract(extract_name) if dataExtract.hasTable('Extract'): table = dataExtract.openTable('Extract') dataSchema = table.getTableDefinition() else: ###Removing extract if it already exists (for full refresh)### try: cwd = os.getcwd() for f in os.listdir(cwd): if re.search(extract_name, f): os.remove(os.path.join(cwd, f)) except OSError, e: pass ###Initializng the Extract API, and applying the schema to the table###
def createTDE(query, dbname, tde_filename): start_time = time.time() db = sql.connect(host="127.0.0.1", port=9999, user="******", db=dbname) cursor = db.cursor() cursor.execute(query) ExtractAPI.initialize() if os.path.isfile(tde_filename): os.remove(tde_filename) tde = Extract(tde_filename) table_def = makeTableDefinition(cursor.description) tde_table = tde.addTable('Extract', table_def) tde_row = Row(table_def) # Pass the table definition to the constructor size = 1000 many = cursor.fetchmany(size) rows_counter = 0 while many: # add counter for rows incase of database disconnection for row in many: # Create new row for colno, col in enumerate(row): col_type = table_def.getColumnType(colno) setType = tde_set_type_function_map[col_type] # print setType # print type(setType) setType(tde_row, colno, col) tde_table.insert(tde_row) rows_counter += 1 many = cursor.fetchmany(size) #tde_row.close() print "cleared while loop" # Set column values. The first parameter is the column number (its # ordinal position) The second parameter (or second and subsequent paramaters) is # the value to set the column to. # new_row.setInteger(0, 1) # new_row.setString(1, 'Amanda') # new_row.setDate(2, 2014, 6, 9) # new_table.insert(new_row) # Add the new row to the table # new_row.setInteger(0, 1) # new_row.setString(1, 'Amanda') # new_row.setDate(2, 2014, 6, 9) # new_table.insert(new_row) # Add the new row to the table # new_row.setInteger(0, 2) # new_row.setString(1, 'Brian') # new_row.setDate(2, 2014, 10, 13) # new_table.insert(new_row) # new_row.setInteger(0, 3) # new_row.setString(1, 'Christina') # new_row.setDate(2, 2015, 2, 16) # new_table.insert(new_row) # Close the extract in order to save the .tde file and clean up resources tde.close() print "TDE Closed" ExtractAPI.cleanup() print "ExtractAPI cleaned up" cursor.close() print "cursor Closed" db.close() print "db closed" timetaken = time.time() - start_time print str(rows_counter) + ' rows inserted in ' + str(timetaken) + ' seconds' print ' (' + str(rows_counter/timetaken) + ' rows per second)' return
def to_tde(dataset, tde_mode, tde_name): if tde_mode == 'new': # Step 1: Create the Extract File dataExtract = Extract(tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") # Step 2: Create the table definition dataSchema = TableDefinition() dataSchema.addColumn('Station', Type.UNICODE_STRING) dataSchema.addColumn('Time', Type.UNICODE_STRING) dataSchema.addColumn('Date', Type.DATE) dataSchema.addColumn('all', Type.DOUBLE) dataSchema.addColumn('n7', Type.DOUBLE) dataSchema.addColumn('n8', Type.DOUBLE) dataSchema.addColumn('y_serries', Type.DOUBLE) # Step 3: Create a table in the image of the table definition table = dataExtract.addTable('Extract', dataSchema) # Step 4: Create rows and insert them one by one newRow = Row(dataSchema) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the tde dataExtract.close() elif tde_mode == 'append': # Step 1: Import the Extract File preExtract = Extract(tde_name) # Step 2: Open existing table table = preExtract.openTable('Extract') # Step 3: Import the table definition tableDef = table.getTableDefinition() # Step 4: Create rows and insert them one by one newRow = Row(tableDef) for i in range(0, len(dataset)): print(i) newRow.setString (0, dataset['Station'][i]) newRow.setString (1, dataset['Time'][i]) newRow.setDate (2, dataset['Date'][i].year, dataset['Date'][i].month, dataset['Date'][i].day) newRow.setDouble (3, dataset['all'][i]) newRow.setDouble (4, dataset['n7'][i]) newRow.setDouble (5, dataset['n8'][i]) newRow.setDouble (6, dataset['y_serries'][i]) table.insert(newRow) # Step 5: Close the extract preExtract.close()
def heatmapToTDE(heatmap, extent, smoothing, extractLocation): xLen = len(heatmap) yLen = len(heatmap[0]) xRange = abs(extent[0] - extent[1]) yRange = abs(extent[2] - extent[3]) xMin = min(extent[0], extent[1]) yMin = min(extent[2], extent[3]) xIncrement = xRange / yLen yIncrement = yRange / xLen # 1. initialize a new extract ExtractAPI.initialize() # 2. Create a table definition new_extract = Extract(extractLocation) # 3. Add column definitions to the table definition table_definition = TableDefinition() table_definition.addColumn('ROW', Type.UNICODE_STRING) # column 0 table_definition.addColumn('COL', Type.UNICODE_STRING) # column 1 table_definition.addColumn('VALUE', Type.DOUBLE) # column 2 table_definition.addColumn('ID', Type.UNICODE_STRING) table_definition.addColumn('CellCount', Type.INTEGER) table_definition.addColumn('Smoothing', Type.DOUBLE) table_definition.addColumn('GEOM', Type.SPATIAL) # 4. Initialize a new table in the extract # a. check if the table already exists # Only add table if it doesn't already exist if (new_extract.hasTable('Extract') == False): new_table = new_extract.addTable('Extract', table_definition) else: new_table = new_extract.openTable('Extract') # 5. Create a new row new_row = Row(table_definition) # Pass the table definition to the constructor # 6. Populate each new row yCoord = yMin for i in range(0, xLen): yCoord += yIncrement xCoord = xMin for j in range(0, yLen): xCoord += xIncrement cellWkt = getFourCorners([xCoord, yCoord], xIncrement, yIncrement) new_row.setString(0, str(i)) # ROW new_row.setString(1, str(j)) # COL new_row.setDouble(2, heatmap[i][j]) # VAL new_row.setString(3, str(i) + '-' + str(j)) # id new_row.setInteger(4, len(heatmap[0])) # cell count new_row.setDouble(5, smoothing) # smoothing new_row.setSpatial(6, cellWkt) # WKT spatial new_table.insert(new_row) # Add the new row to the table # 7. Save the table and extract new_extract.close() # 8. Release the extract API ExtractAPI.cleanup() return
from tableausdk import * from tableausdk.Server import * from tableausdk.Extract import * import tableausdk.Extract as tde #Define a new tde file. tdefile = tde.Extract('test1.tde') #Defining a new data set/ table defination in tde tableDef = tde.TableDefinition() tableDef.addColumn("company", 15) #INTEGER tableDef.addColumn("projected", 10) #DOUBLE tableDef.addColumn("realRev", 10) #DOUBLE #Let's add dataset to the file tabletran = tdefile.addTable("Extract", tableDef) #Create new Row newrow = tde.Row(tableDef) # Adding data value # First param represents sequence of the column newrow.setCharString(0, 'myCompany') newrow.setDouble(1, 1000) newrow.setDouble(2, 888) # adding new row to the dataset tabletran.insert(newrow) #Create new Row newrow = tde.Row(tableDef) newrow.setCharString(0, 'myCompany2')
def new_tde(self): dataExtract = Extract(self.tde_name) if dataExtract.hasTable('Extract'): return print("tde already exist use another name") return dataExtract
def build(self): # Set filename tdefileName = self.tde_settings_ins.tde_file if os.path.isfile(tdefileName): os.remove(tdefileName) #set the extract object instance tdefile = tdeEx.Extract(tdefileName) # create the column schema tableDef = tdeEx.TableDefinition() columnsListedInOrder = [] outputColIndex = {} outputColNumber = 0 for columnName, index in sorted( self.tde_settings_ins.columnIndex.items(), key=lambda x: x[1]): BasicDataType = self.tde_settings_ins.columns[columnName] tableDef.addColumn(columnName, tdeSettings.schemaIniTypeMap[BasicDataType]) outputColIndex[columnName] = outputColNumber outputColNumber += 1 #add the column schema to the ojbect instance of EXTRACT table = tdefile.addTable("Extract", tableDef) rowNo = 0 rowsIterator = self.tde_settings_ins.inputInfo.fileInformation.yieldRowsBase( ) printedBool = False startTime = datetime.datetime.now() print "Build start time:", datetime.datetime.strftime( startTime, '%Y-%m-%d %H:%M:%S') prevPct = -1 for myReaderRow in rowsIterator: putRow = tdeEx.Row(tableDef) for columnName, index in self.tde_settings_ins.columnIndex.items(): #if not column headers need to change this to index of the column if (tdeSettings.schemaIniTypeMap[ self.tde_settings_ins.columns[columnName]] != tde.Type.UNICODE_STRING and myReaderRow[index] == ""): putRow.setNull(outputColIndex[columnName]) else: try: #valueDecoded=myReaderRow[index].decode(self.tde_settings_ins.inputInfo.fileInformation.encodeing) self.fieldSetterMap[tdeSettings.schemaIniTypeMap[ self.tde_settings_ins.columns[columnName]]]( putRow, outputColIndex[columnName], myReaderRow[index]) except: print "column name", columnName, "contains invalid data" print "value: ", myReaderRow[index] print sys.exc_info()[0] raise table.insert(putRow) # Output progress line #only print percent one time when we hit that percent rowNo += 1 pct = math.floor( self.tde_settings_ins.inputInfo.fileInformation.getPercent( rowNo)) # print "row:",rowNo # print "pct:",pct # print "value: ",pct %2 if pct % 2 == 0: if printedBool == False or prevPct != pct: print "Percent:", round(pct, 0), "%, row:", rowNo printedBool = True prevPct = pct else: printedBool = False print "tde file built to:", tdefileName endTime = datetime.datetime.now() print "End Time:", endTime diff = endTime - startTime print "Build elapsed time:", diff
import threading import time import config_sql_bdi_call # TAB_SDK_LOGDIR The folder where the Tableau SDK writes log files. # TAB_SDK_TMPDIR The temporary directory where the SDK keeps intermediate (temporary) files, such as when it's creating an extract. # os.environ['TAB_SDK_LOGDIR'] = 'G:\Scripts\ServerAdmin\Logs\Back_Dated_Inc' # os.environ['TAB_SDK_TMPDIR'] = 'G:\Scripts\ServerAdmin\Logs\Back_Dated_Inc' # Initialize the extract API process ExtractAPI.initialize() # create new extract file tdenew = Extract("./" + str(sys.argv[1] + "_histmonth.tde")) print str(datetime.datetime.now()) + " begin time of sql" # Connect to Greenplum # cnxn = pyodbc.connect('DRIVER={DataDirect 7.1 Greenplum Wire Protocol};HOST=dtord01gpv01p.dc.dotomi.net;PORT=5432;DATABASE=vds_prd;UID=generic_sql_report_utility;PWD=DTMSqls3rv3r') cnxn = pyodbc.connect("DSN=PostgreSQL-test") # cnxn = pyodbc.connect('DRIVER={psqlodbca.so};HOST=dtord01gpv01p.dc.dotomi.net;PORT=5432;DATABASE=vds_prd;UID=generic_sql_report_utility;PWD=DTMSqls3rv3r') cursor = cnxn.cursor() # Get SQL sqlrun = config_sql_bdi_call.execScriptsFromFile("config_" + str(sys.argv[1]) + ".sql") # execute SQL cursor.execute(sqlrun)
# predict_date.append(pd.to_datetime('2012-01-01') + timedelta(i)) # # #xp = preprocessing.scale(ml_data) # # #print dataw # # print ml_data # df = pd.concat([data4_1,DataFrame(predict_date,columns=['predict value'],index=get_data_model(get_date_diff2(data4_1,t=4.1),model=4.1).index)],ignore_index=False,axis=1) combined_data = pd.concat([a,b,c,d],axis=0,ignore_index=True) #print combined_data ExtractAPI.initialize() if os.path.isfile("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde"): os.remove("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde") createExtract = Extract("/Users/jlin/Desktop/data_stuff/predict_data_Extract.tde") for i in combined_data.columns: if i in ['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release']: combined_data[i] = pd.to_datetime(combined_data[i],coerce=True) #combined_data = combined_data.drop(['Contract Execute','Contract Sent to Site','Essential Doc','IRB','Ip Release'],1) if createExtract.hasTable('Extract'): table = createExtract.openTable('Extract')
def extract(file_name): # move file to /extract # cd to /extract # if there is no extract called TRACK_TERM then create one, otherwise append to TRACK_TERM # define data model for extract global WORKING_DIRECTORY, TRACK_TERM if not os.path.exists(WORKING_DIRECTORY + '/extract/'): os.makedirs(WORKING_DIRECTORY + '/extract/') from_path = WORKING_DIRECTORY + '/' + file_name to_path = WORKING_DIRECTORY + '/extract/' + file_name os.rename(from_path, to_path) os.chdir(WORKING_DIRECTORY + '/extract') # define the extract with data.Extract(TRACK_TERM + '.tde') as extract: tableDef = data.TableDefinition() # define the columns and the data types in the extract tableDef.addColumn('lang', types.Type.CHAR_STRING) #0 tableDef.addColumn('sentiment', types.Type.DOUBLE) #1 tableDef.addColumn('country', types.Type.CHAR_STRING) #2 tableDef.addColumn('created_at', types.Type.DATETIME) #3 tableDef.addColumn('tweet_text', types.Type.CHAR_STRING) #4 tableDef.addColumn('Longitude', types.Type.DOUBLE) #5 tableDef.addColumn('source', types.Type.CHAR_STRING) #6 tableDef.addColumn('user', types.Type.CHAR_STRING) #7 tableDef.addColumn('Latitude', types.Type.DOUBLE) #8 table = None if not extract.hasTable('Extract'): # Table does not exist, so create it. print "Creating a new extract" table = extract.addTable('Extract', tableDef) else: # Table exists, so append the new data. print "Appending to an existing extract" table = extract.openTable('Extract') new_row = data.Row(tableDef) # read the data from the CSV into the extract row object with open(file_name, 'r') as inf: reader = csv.DictReader(inf, delimiter=',', lineterminator='\n') for row in reader: # insert data into the row object in the correct order as defined above new_row.setCharString(0, row['lang']) sentiment = float(row['sentiment']) new_row.setDouble(1, sentiment) new_row.setCharString(2, row['country']) # parse the twitter date string: # Mon Sep 21 11:03:53 +0000 2015 # %a %b %d %H:%M:%S +0000 %Y date_object = datetime.strptime(row['created_at'], '%a %b %d %H:%M:%S +0000 %Y') year = int(datetime.strftime(date_object, '%Y')) month = int(datetime.strftime(date_object, '%m')) day = int(datetime.strftime(date_object, '%d')) hour = int(datetime.strftime(date_object, '%H')) min = int(datetime.strftime(date_object, '%M')) sec = int(datetime.strftime(date_object, '%S')) frac = 0 # fractions of a second aka milliseconds new_row.setDateTime(3, year, month, day, hour, min, sec, frac) new_row.setCharString(4, row['tweet_text']) # check if there is a value for longitude, otherwise write a 0 try: longitude = float(row['longitude']) except: longitude = 0 new_row.setDouble(5, longitude) new_row.setCharString(6, row['source']) new_row.setCharString(7, row['user']) # check if there is a value for latitude, otherwise write a 0 try: latitude = float(row['latitude']) except: latitude = 0 new_row.setDouble(8, latitude) table.insert(new_row) # if the process fails we want to be able to re-run it without collisions between file names # so give each file a unique name (unix time stamp in this case). os.rename(file_name, str(time.time()).split('.')[0] + '.csv') # cd back to working directory os.chdir(WORKING_DIRECTORY) return
if arg_tde_filename: tde_filename = arg_tde_filename else: tde_filename = csv_filename.replace(".csv", ".tde") # print "Will write to [%s]" % os.path.join(tde_dirpath, tde_filename) # print # Step 1: Create the Extract file and open the csv tde_fullpath = os.path.join(tde_dirpath, tde_filename) # if the file doesn't exist, revoke the append if arg_append and not os.path.isfile(tde_fullpath): print "Couldn't append -- file doesn't exist." arg_append = False if not arg_append and os.path.isfile(tde_fullpath): os.remove(tde_fullpath) new_extract = Extract(tde_fullpath) # Define the columns by the first csv table_df = pandas.read_csv(os.path.join(csv_dirpaths[0], csv_filename), names=arg_header) for join_table_file in arg_join: join_df = pandas.read_csv(join_table_file) table_df = pandas.merge(table_df, join_df, how='left') # Step 2: Create the tableDef table_definition = TableDefinition() old_colnames = [] colnames_to_types = {} new_colnames_to_idx = {} for (col, dtype) in itertools.izip(table_df.columns, table_df.dtypes): colname = col.strip() # strip whitespace
def assemble_tde(self): """ Gather the data information and create the Tde. :return: """ try: print "Creating extract:", self.file_name ExtractAPI.initialize() with Extract(self.file_name) as data_extract: # If extract Does exist add to the Extract table and file if data_extract.hasTable('Extract'): # Open an existing table to add more rows table = data_extract.openTable('Extract') table_definition = table.getTableDefinition() else: table_definition = TableDefinition() for each in self.data_types.reset_index(level=0).values: # Add the column info to the table definition table_definition.addColumn( str(each[0]), schema_type_map[str(each[1])]) # Create the Table with the table definition table = data_extract.addTable("Extract", table_definition) new_row = Row(table_definition) count = self.data_frame.shape[0] pbar = Bar(count) # Run through dataframe data and add data to the table object for i in range(count): for j, column_name in enumerate( self.data_types.index.values.tolist()): self.add_to_row(new_row, j, self.data_frame[column_name].iloc[i], self.data_types['data_type'][j], column_name) table.insert(new_row) self.last_data_row_extracted = self.data_frame.iloc[ i].to_frame().transpose() # if i == 1: # raise Exception pbar.passed() data_extract.close() ExtractAPI.cleanup() except: file_name = self.tde_file() # Clean up resources Extract(file_name).close() ExtractAPI.cleanup() # Create csv and pickle of the data self.data_frame.to_pickle(file_name.replace('.tde', '_pickle')) self.data_frame.to_csv(file_name.replace('.tde', '.csv'), index=False, encoding='utf-8') raise Exception( "Error in creating tde file please consult data files. \n%s\n%s. \n%s, %s, %s" % (file_name.replace( '.tde', '_pickle'), file_name.replace('.tde', '.csv'), 'Error on line {}'.format(sys.exc_info()[-1].tb_lineno), sys.exc_info()[0], sys.exc_info()[1]))
tde_filename = arg_tde_filename else: tde_filename = csv_filename.replace(".csv", ".tde") # print "Will write to [%s]" % os.path.join(tde_dirpath, tde_filename) # print # Step 1: Create the Extract file and open the csv tde_fullpath = os.path.join(tde_dirpath, tde_filename) # if the file doesn't exist, revoke the append if arg_append and not os.path.isfile(tde_fullpath): print "Couldn't append -- file doesn't exist." arg_append = False if not arg_append and os.path.isfile(tde_fullpath): os.remove(tde_fullpath) new_extract = Extract(tde_fullpath) # Define the columns by the first csv table_df = pandas.read_csv(os.path.join(csv_dirpaths[0], csv_filename), names=arg_header) for join_table_file in arg_join: join_df = pandas.read_csv(join_table_file) table_df = pandas.merge(table_df, join_df, how='left') # Step 2: Create the tableDef table_definition = TableDefinition() old_colnames = [] colnames_to_types = {} new_colnames_to_idx = {} for (col,dtype) in itertools.izip(table_df.columns, table_df.dtypes): colname = col.strip() # strip whitespace old_colnames.append(colname)
from tableausdk import Extract as tde # **Module change from py2 for py3 from tableausdk import Types as tdeTypes # ** import os #Step 1: Create the Extract File if os.path.isfile('trivialExample.tde'): os.remove('trivialExample.tde') tdefile = tde.Extract('trivialExample.tde') #Step 2: Create the tableDef tableDef = tde.TableDefinition() tableDef.addColumn('rowID', tdeTypes.Type.CHAR_STRING) # Column index 0. ** tableDef.addColumn('value', tdeTypes.Type.DOUBLE) #Column index 1. ** #Step 3: Create the table in the image of the tableDef table = tdefile.addTable('Extract', tableDef) #Step 4: Create some rows and insert them into the table newrow = tde.Row(tableDef) for i in range(1, 101): newrow.setCharString(0, 'Row ' + str(i)) newrow.setDouble(1, i) table.insert(newrow) #Step 5: Close the tde tdefile.close()
def tableau_extract(resultset, data_file): """ Create TDE extract. """ if isinstance(resultset, ResultSet): df_name = resultset.DataFrame() else: df_name = resultset data_type = [] fieldnames = [] data_type_map = {int: Type.INTEGER, str: Type.UNICODE_STRING, bool: Type.BOOLEAN, bytearray: Type.CHAR_STRING, list: Type.CHAR_STRING, dict: Type.CHAR_STRING, float: Type.DOUBLE, decimal.Decimal: Type.DOUBLE, datetime.date: Type.DATE, datetime.time: Type.DURATION, datetime.datetime: Type.DATETIME, pd._libs.tslib.Timestamp: Type.DATETIME } for col in df_name: fieldnames.append(col) data_type.append(df_name[col].apply(type).iat[0]) data_dict = dict(zip(fieldnames, data_type)) for col_name in data_dict: if data_dict[col_name] in data_type_map: data_dict[col_name] = data_type_map[data_dict[col_name]] else: data_dict[col_name] = Type.UNICODE_STRING # Initialize a new extract try: os.remove(data_file) except OSError: pass new_extract = Extract(data_file) table_definition = TableDefinition() for col_name in data_dict: table_definition.addColumn(col_name, data_dict[col_name]) new_table = new_extract.addTable('Extract', table_definition) new_row = Row(table_definition) tde_types = {'INTEGER': 7, 'DOUBLE': 10, 'BOOLEAN': 11, 'DATE': 12, 'DATETIME': 13, 'DURATION': 14, 'CHAR_STRING': 15, 'UNICODE_STRING': 16} for i in range(0, len(df_name)): for col in range(0, table_definition.getColumnCount()): col_name = table_definition.getColumnName(col) try: if data_dict[col_name] == tde_types['INTEGER']: new_row.setInteger(col, int(df_name[col_name][i])) elif data_dict[col_name] == tde_types['DOUBLE']: new_row.setDouble(col, float(df_name[col_name][i])) elif data_dict[col_name] == tde_types['BOOLEAN']: new_row.setBoolean(col, bool(df_name[col_name][i])) elif data_dict[col_name] == tde_types['DATE']: data = df_name[col_name][i] new_row.setDate(col, data.year, data.month, data.day) elif data_dict[col_name] == tde_types['DATETIME']: data = df_name[col_name][i] new_row.setDateTime(col, data.year, data.month, data.day, data.hour, data.minute, data.second, 0) elif data_dict[col_name] == tde_types['DURATION']: data = df_name[col_name][i] new_row.setDuration(col, data.hour, data.minute, data.second, 0) elif data_dict[col_name] == tde_types['CHAR_STRING']: new_row.setCharString(col, str(df_name[col_name][i])) elif data_dict[col_name] == tde_types['UNICODE_STRING']: new_row.setString(col, str(df_name[col_name][i])) else: print('Error') new_row.setNull(col) except TypeError: new_row.setNull(col) new_table.insert(new_row) new_extract.close() ExtractAPI.cleanup() for file_name in glob.glob("DataExtract*.log"): os.remove(file_name)
type_non_obj.append(i) obj_dict = {x: None for x in type_obj} non_obj_dict = {x: None for x in type_non_obj} df = df.replace(obj_dict,'na') df = df.replace(non_obj_dict,0) # STEP 1 - Initialize the Extract API ExtractAPI.initialize() # STEP 2 - Initialize a new extract by calling the Extract() constructor if os.path.isfile(tde_file): os.remove(tde_file) td_extract = Extract(tde_file) # STEP 3 - Create a table definition table_definition = TableDefinition() # STEP 4 - Create new column definition for i in range(len(df.columns)): if df.dtypes[i] == 'object': table_definition.addColumn(df.columns[i], Type.UNICODE_STRING) elif df.dtypes[i] == 'float64': table_definition.addColumn(df.columns[i], Type.DOUBLE) elif df.dtypes[i] == 'int64': table_definition.addColumn(df.columns[i], Type.INTEGER) elif df.dtypes[i] == 'datetime64[ns]': table_definition.addColumn(df.columns[i], Type.DATE)
## import the libraries import tableausdk.Extract as tde import pandas as pd import os ## bring in a sample Graduate School Admissions datasets file_name = "http://www.ats.ucla.edu/stat/data/binary.csv" df = pd.read_csv(file_name) df.head() df.shape ## create the extract name, but remove the extract if it already exists fname = "example.tde" try: tdefile = tde.Extract(fname) except: os.system('del ' + fname) os.system('del DataExtract.log') tdefile = tde.Extract(fname) ## define the table definition tableDef = tde.TableDefinition() ## create a list of column names and types colnames = df.columns coltypes = df.dtypes ## create a dict for the field maps ## Caveat: I am not including all of the possibilities below fieldMap = { 'float64': tde.Types.Type.DOUBLE,