def build_scenario(build_plan, filter_on='ready', connection='dataiku_workspace', ref_table='referentialclient', ref_project='DIReferential', add_ecrm_context=True, finish_on_client=None, single_client=None): scenario = Scenario() if not isinstance(filter_on, list): filter_on = [filter_on] project_name = dataiku.default_project_key() project_key = dataiku.api_client().get_project(project_name) local_variables = project_key.get_variables()['local'] env = local_variables['env'] kut.display_message('reading client context referential') executor = SQLExecutor2(connection=connection) sql_query_referential_client = "SELECT * FROM " + '_'.join( [env, ref_project, ref_table]) client_ref = executor.query_to_df(sql_query_referential_client) filter_query = ' & '.join(filter_on) client_ref = client_ref.query(filter_query) if filter_query else client_ref kut.display_message('Client ready for automation : ' + client_ref.clientName.unique()) kut.display_message('run configuration') print(build_plan) if not pd.isnull(finish_on_client): finish_client = client_ref[client_ref.clientName == finish_on_client] if len(finish_client) == 0: kut.display_message( 'finish client not found in plan ' + finish_on_client + ' is the client name valid ?' ) # Example: load a DSS dataset as a Pandas dataframe other_clients = client_ref[client_ref.clientName != finish_on_client] client_ref = pd.concat([other_clients, finish_client], ignore_index=True) success = [] if single_client is not None: requested_client = client_ref[client_ref.clientName == single_client] if not len(single_client): kut.display_message( 'requested single client is not found,building all allowed clients' ) else: client_ref = requested_client for index, client_row in client_ref.iterrows(): variables = set_client_context(client_row=client_row, add_ecrm_context=add_ecrm_context, connection=connection) client_name = variables['local']['clientName'] kut.display_message('starting builds on ' + client_name) run_scenario(table_plan=build_plan, scenario=scenario) success.append(client_name) scenario.set_global_variables(successfullRun=success) print('done_________________' + client_name) return success
def asterDo(): # Recipe inputs main_input_name = get_input_names_for_role('main')[0] input_dataset = dataiku.Dataset(main_input_name) # Recipe outputs main_output_name = get_output_names_for_role('main')[0] output_dataset = dataiku.Dataset(main_output_name) # Recipe function param dss_function = get_recipe_config().get('function', None) # Daitaiku DSS params client = dataiku.api_client() projectkey = main_input_name.split('.')[0] project = client.get_project(projectkey) try: # output dataset outputTable = outputtableinfo( output_dataset.get_location_info()['info'], main_output_name, get_recipe_config() or {}) except Exception as error: raise RuntimeError( """Error obtaining connection settings for output table.""" """ Make sure connection setting is set to 'Read a database table'.""" """ This plugin only supports Aster tables.""") # input datasets try: main_input_names = get_input_names_for_role('main') inputTables = [] for inputname in main_input_names: inconnectioninfo = dataiku.Dataset( inputname).get_location_info()['info'] inTable = inputtableinfo(inconnectioninfo, inputname, dss_function) inputTables.append(inTable) except Exception as error: raise RuntimeError( """Error obtaining connection settings from one of the input tables.""" """ Make sure connection setting is set to 'Read a database table'.""" """ This plugin only supports Aster tables.""") # actual query query = getFunctionsQuery(dss_function, inputTables, outputTable) print('\n'.join(query)) executor = SQLExecutor2(dataset=input_dataset) if dss_function.get('dropIfExists', False): dropAllQuery = getDropOutputTableArgumentsStatements( dss_function.get('arguments', [])) executor.query_to_df('END TRANSACTION;', dropAllQuery) executor.query_to_df("END TRANSACTION;", pre_queries=query) # write table schema nQuery = '''SELECT * FROM {} LIMIT (1);'''.format(outputTable.tablename) selectResult = executor.query_to_df(nQuery) output_schema = [] for column in selectResult.columns: output_schema.append({"name": column, "type": "string"}) output_dataset.write_schema(output_schema)
def get_sql_table(referential_name, project_key='DIReferential'): env = get_project_variables(scope='local')['env'] project_key = dataiku.default_project_key( ) if not project_key or project_key == 'self' else project_key table_name = '_'.join([env, project_key, referential_name.lower()]) query = 'SELECT * FROM ' + table_name connection = SQLExecutor2(connection='dataiku_workspace') return connection.query_to_df(query)
def add_ECRM_context(variables, connection='dataiku_workspace'): print('adding ecrm context') project_name = dataiku.default_project_key() project = dataiku.api_client().get_project(project_name) local_variables = project.get_variables()['local'] env = local_variables['env'] executor = SQLExecutor2(connection=connection) sql_query_client_ecrm = "SELECT * FROM " + env + "_DIReferential_referentialECRMOperation" client_ecrm = executor.query_to_df(sql_query_client_ecrm) ecrm_info = client_ecrm[client_ecrm.clientName == variables['local'] ['clientName']] print('found', len(ecrm_info), 'relevant entries') variables['local']['ecrmOperations'] = {} for i, operation_row in ecrm_info.iterrows(): operation_dict = operation_row.to_dict() operation_type = operation_dict['operationType'] del (operation_dict['operationType']) variables['local']['ecrmOperations'][operation_type] = operation_dict return variables
def store_name_of_views(view_ids=None, silent_mode=True): env = nxauto.get_project_variables(scope='local')['env'] table_name = '_'.join([env, 'DIReferential', 'referentialstorename']) query = 'SELECT * FROM ' + table_name connection = SQLExecutor2(connection='dataiku_workspace') store_referential = connection.query_to_df(query) store_referential = store_referential[['storeName', 'viewId']] store_referential = store_referential[~pd.isnull(store_referential.viewId)] if view_ids is not None: store_referential = kut.filter_on_values( df=store_referential, value_list=view_ids, column_name='viewId' ) store_referential['viewId'] = store_referential.viewId.astype(int) store_name_dic = store_referential.set_index('viewId')['storeName'].to_dict() if not silent_mode: print(store_referential) print(store_referential.dtypes) print(store_name_dic) return store_name_dic
# -*- coding: utf-8 -*- import dataiku import pandas as pd, numpy as np from dataiku import pandasutils as pdu # Import the class that allows us to execute SQL on the Studio connections from dataiku.core.sql import SQLExecutor2 # Read recipe inputs sfo_prepared = dataiku.Dataset("sfo_prepared") sfo_prepared_df = sfo_prepared.get_dataframe() # We create an executor. We pass to it the dataset instance. This way, the # executor knows which SQL database should be targeted executor = SQLExecutor2(dataset=sfo_prepared) # Get the 5 most frequent manufacturers by total landing count # (over the whole period) mf_manufacturers = executor.query_to_df( """ select "Aircraft Manufacturer" as manufacturer, sum("Landing Count") as count from sfo_prepared group by "Aircraft Manufacturer" order by count desc limit 5 """) # The "query_to_df" method returns a Pandas dataframe that # contains the manufacturers
columns_right = get_recipe_config()['columns_2'] ############################# # Original recipe ############################# #Start the loop joins = ['LEFT', 'RIGHT', 'INNER'] join_conds = [] for key in range(len(key_a)): join_cond = Expression() globals()['join_cond_'+str(key)] = join_cond.and_(Column(key_a[key], input_A_names[0]).eq_null_unsafe(Column(key_b[key], input_B_names[0]))) join_conds.append(globals()['join_cond_'+str(key)]) for i in joins: query = SelectQuery() query.select_from(input_A_datasets[0], alias = input_A_names[0]) for j in columns_left: query.select(Column(j, input_A_names[0]),alias = j) for k in columns_right: query.select(Column(k, input_B_names[0]),alias = k) query.join(input_B_datasets[0], i, join_conds, operatorBetweenConditions = operator , alias= input_B_names[0]) globals()['sql_'+str(i)] = toSQL(query, input_A_datasets[0]) e = SQLExecutor2() e.exec_recipe_fragment(output_A_datasets[0], sql_LEFT) e.exec_recipe_fragment(output_B_datasets[0], sql_RIGHT) e.exec_recipe_fragment(output_C_datasets[0], sql_INNER)
def asterDo(): """ Takes the parameters set by the user from the UI, creates the query, and then executes it. """ # Formatting options. SEP_LENGTH = 80 SEP = "=" * SEP_LENGTH # Recipe inputs main_input_name = get_input_names_for_role('main')[0] input_dataset = dataiku.Dataset(main_input_name) # Recipe outputs main_output_name = get_output_names_for_role('main')[0] output_dataset = dataiku.Dataset(main_output_name) # Daitaiku DSS params client = dataiku.api_client() projectkey = main_input_name.split('.')[0] project = client.get_project(projectkey) # Connection properties. # TODO: Handle Input and Output Table connection properties. properties = input_dataset.get_location_info( sensitive_info=True)['info'].get('connectionParams').get('properties') autocommit = input_dataset.get_location_info( sensitive_info=True)['info'].get('connectionParams').get( 'autocommitMode') # SQL Executor. executor = SQLExecutor2(dataset=input_dataset) # Handle pre- and post-query additions. # Assume autocommit TERA mode by default. pre_query = None post_query = None print(SEP) if not autocommit: print("NOT AUTOCOMMIT MODE.") print("Assuming TERA mode.") pre_query = ["BEGIN TRANSACTION;"] post_query = ["END TRANSACTION;"] for prop in properties: if prop['name'] == 'TMODE': if prop['value'] == 'ANSI': print("ANSI mode.") pre_query = [";"] post_query = ["COMMIT WORK;"] else: print("AUTOCOMMIT MODE.") print("No pre- and post-query.") print(SEP) # Recipe function param dss_function = get_recipe_config().get('function', None) pp = pprint.PrettyPrinter(indent=4) print(SEP) print('DSS Function:') pp.pprint(dss_function) print(SEP) # output dataset try: outputTable = outputtableinfo( output_dataset.get_location_info()['info'], main_output_name, get_recipe_config() or {}) except Exception as error: raise RuntimeError( """Error obtaining connection settings for output table.""" """ This plugin only supports Teradata tables.""") # input datasets try: main_input_names = get_input_names_for_role('main') inputTables = [] for inputname in main_input_names: inconnectioninfo = dataiku.Dataset( inputname).get_location_info()['info'] inTable = inputtableinfo(inconnectioninfo, inputname, dss_function) inputTables.append(inTable) except Exception as error: raise RuntimeError( """Error obtaining connection settings from one of the input tables.""" """ This plugin only supports Teradata tables.""") # Handle dropping of output tables. if dss_function.get('dropIfExists', False): print("Preparing to drop tables.") drop_query = dropTableStatement(outputTable) print(SEP) print("DROP query:") print(drop_query) print(SEP) try: # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) executor.query_to_df(drop_query, post_queries=post_query) except Exception as e: print(e) # Drop other output tables if they exist. drop_all_query = getDropOutputTableArgumentsStatements( dss_function.get('output_tables', [])) print(SEP) print('Drop ALL Query:') print(drop_all_query) print(SEP) for drop_q in drop_all_query: # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) executor.query_to_df(drop_q, post_queries=post_query) # CREATE query. create_query = getFunctionsQuery(dss_function, inputTables, outputTable, get_recipe_config() or {}) print(SEP) print("CREATE query:") print(create_query) print(SEP) # Detect error try: # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) executor.query_to_df(create_query, post_queries=post_query) except Exception as error: err_str = str(error) err_str_list = err_str.split(" ") # trying to shorten the error for the modal in front-end if len(err_str_list) > 15: print(SEP) print(error) print(SEP) new_err_str = err_str_list[:15] new_err_str.append("\n\n") new_err_str.append("...") new_err_str = " ".join(new_err_str) raise RuntimeError(new_err_str) else: raise RuntimeError(err_str) print('Moving results to output...') select_sample_query = 'SELECT * from ' + outputTable.tablename + ' SAMPLE 0' # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) sel_res = executor.query_to_df(select_sample_query, post_queries=post_query) pythonrecipe_out = output_dataset pythonrecipe_out.write_schema_from_dataframe(sel_res) # Additional Tables outtables = dss_function.get('output_tables', []) if (outtables != []): tableCounter = 1 print('Working on output tables') print(get_output_names_for_role('main')) print(outtables) for table in outtables: if table.get('value') != '' and table.get('value') != None: # try: print('Table') print(table) #Need to change this to max of split in order for multiple database or no-database table inputs #Change below might fix issue 4 of Jan 4 2018 for Nico. For non-drop tables try: main_output_name2 = list( filter( lambda out_dataset: out_dataset.split('.')[len( out_dataset.split('.')) - 1] == table. get('value').split('.')[len( table.get('value').split('.')) - 1].strip('\"' ), get_output_names_for_role('main')))[0] except Exception as error: # print(error.message) raise RuntimeError( 'Unable to find an output dataset for ' + table.get('value') + 'It may not exist as an output Dataset: ' + table.get('value') + "\n\Runtime Error:" + error.message) print('Output name 2') print(main_output_name2) output_dataset2 = dataiku.Dataset(main_output_name2) # print("od2 printer") tableNamePrefix = output_dataset2.get_location_info( sensitive_info=True)['info'].get('connectionParams').get( 'namingRule').get('tableNameDatasetNamePrefix') if tableNamePrefix != None or tableNamePrefix != '': print('Table prefix is not empty:' + tableNamePrefix) # except: # #Need to change this error # print('Error: Dataset for' + table.get('name') + ' not found') # raise Value customOutputTableSQL = 'SELECT * from ' + table.get( 'value') + ' SAMPLE 0' print('Working on table number:') print(tableCounter) print(customOutputTableSQL) # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) sel_res = executor.query_to_df(customOutputTableSQL, post_queries=post_query) # selRes = executor.query_to_df(customOutputTableSQL, pre_queries=pre_query, post_queries=post_query) tableCounter += 1 pythonrecipe_out2 = output_dataset2 pythonrecipe_out2.write_schema_from_dataframe(selRes) print('Complete!') # Uncomment end
def __init__(self, dataset): self.dataset = dataset self.is_hdfs = 'hiveTableName' in dataset.get_config().get( 'params').keys() self.executor = HiveExecutor( dataset=dataset) if self.is_hdfs else SQLExecutor2(dataset=dataset)
"date": date_columns, "boolean": bool_columns } for col in dataset.read_schema(): dispatch.get(col["type"], str_columns).append(col["name"]) #============================================================================== # RUN #============================================================================== logging.info("DSS features: %s" % dss_settings["features"]) is_hivelike = False if dataset_config["type"] in SUPPORTED_DB: q = '"' sqlexec = SQLExecutor2(dataset=dataset) logging.info("Dataset config: %s" % dataset_config) #table = dataset_config["params"].get("table", dataset.short_name) table = '_'.join(dataset.name.split('.')) elif dataset_config["type"] == "HDFS": q = '`' if use_impala and compute_distinct: raise ValueError("Cannot compute distinct values on Impala") if "IMPALA" in dss_settings["features"] and use_impala: sqlexec = ImpalaExecutor(dataset=dataset) else: sqlexec = HiveExecutor(dataset=dataset) is_hivelike = True table = dataset.short_name else: raise Exception("Unsupported input dataset type: %s" %
def _execute(self, table, output_dataset): query = toSQL(table, dataset=output_dataset) logger.info(f"Executing query:\n{query}") sql_executor = SQLExecutor2(dataset=output_dataset) sql_executor.exec_recipe_fragment(output_dataset, query) logger.info("Done executing query !")
def asterDo(): # Recipe inputs main_input_name = get_input_names_for_role('main')[0] input_dataset = dataiku.Dataset(main_input_name) # print('Connection info:') # print(input_dataset.get_location_info(sensitive_info=True)['info']) properties = input_dataset.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('properties') autocommit = input_dataset.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('autocommitMode') requiresTransactions = True print('I am assuming in TERA MODE by default') if autocommit: print('I am assuming autocommmit TERA mode') requiresTransactions = False tmode = 'TERA' stTxn = ';' edTxn = ';' else: #Detected TERA print('I am assuming non-autocommit TERA MODE') tmode = 'TERA' stTxn = 'BEGIN TRANSACTION;' edTxn = 'END TRANSACTION;' for prop in properties: if prop['name'] == 'TMODE' and prop['value'] == 'TERA': if autocommit: print('I am in autocommmit TERA mode') requiresTransactions = False tmode = 'TERA' stTxn = ';' edTxn = ';' else: #Detected TERA print('I am in TERA MODE') tmode = 'TERA' stTxn = 'BEGIN TRANSACTION;' edTxn = 'END TRANSACTION;' elif prop['name'] == 'TMODE' and prop['value'] == 'ANSI': #Detected ANSI print('I am in ANSI MODE') tmode = 'ANSI' stTxn = ';' edTxn = 'COMMIT WORK;' print(properties) # Recipe outputs main_output_name = get_output_names_for_role('main')[0] output_dataset = dataiku.Dataset(main_output_name) # Recipe function param dss_function = get_recipe_config().get('function', None) print ("\n=================================\n") print ('Showing DSS Function') print (dss_function) print ("\n=================================\n") # Daitaiku DSS params client = dataiku.api_client() projectkey = main_input_name.split('.')[0] project = client.get_project(projectkey) try: # output dataset outputTable = outputtableinfo(output_dataset.get_location_info()['info'], main_output_name, get_recipe_config() or {}) except Exception as error: raise RuntimeError("""Error obtaining connection settings for output table.""" """ This plugin only supports Teradata tables.""") # raise RuntimeError("""Error obtaining connection settings for output table.""" # """ Make sure connection setting is set to 'Read a database table'.""" # """ This plugin only supports Aster tables.""") # input datasets try: main_input_names = get_input_names_for_role('main') inputTables = [] for inputname in main_input_names: inconnectioninfo = dataiku.Dataset(inputname).get_location_info()['info'] inTable = inputtableinfo(inconnectioninfo, inputname, dss_function) inputTables.append(inTable) except Exception as error: raise RuntimeError("""Error obtaining connection settings from one of the input tables.""" """ This plugin only supports Teradata tables.""") # actual query query = getFunctionsQuery(dss_function, inputTables, outputTable, get_recipe_config() or {}) # print("=============================") # print (query) # print("=============================") # raise RuntimeError("""I Just wanted to make this execution stop: """) # Uncomment below executor = SQLExecutor2(dataset=input_dataset) if dss_function.get('dropIfExists', False): # dropTableStr = dropTableStatement(outputTable) # executor.query_to_df(dropTableStr) if requiresTransactions: print('Start transaction for drop') print(stTxn) executor.query_to_df(stTxn) print('End transaction for drop') print(edTxn) executor.query_to_df(edTxn,[dropTableStr]) else: executor.query_to_df([dropTableStr]) #Start transaction #Move to drop query and make each DROP run separately dropAllQuery = getDropOutputTableArgumentsStatements(dss_function.get('output_tables', [])) print('Drop ALL Query:') print(dropAllQuery) # if requiresTransactions: # print('Start transaction for drop') # print(stTxn) # executor.query_to_df(stTxn) # dropAllQuery = getDropOutputTableArgumentsStatements(dss_function.get('arguments', [])) #Change dropAllQuery to string/s? or execute one drop per item in list. if requiresTransactions: for dropTblStmt in dropAllQuery: print('Start transaction for drop') print(stTxn) executor.query_to_df(stTxn) print('End transaction for drop') print(edTxn) executor.query_to_df(edTxn,[dropTblStmt]) # executor.query_to_df(edTxn) else: for dropTblStmt in dropAllQuery: executor.query_to_df([dropTblStmt]) # executor.query_to_df("END TRANSACTION;", pre_queries=query) # write table schema ACTUAL COMMENT # nQuery = '''SELECT * FROM {} LIMIT (1);'''.format(outputTable.tablename) # selectResult = executor.query_to_df(nQuery); # output_schema = [] # for column in selectResult.columns: # output_schema.append({"name":column, "type":"string"}) # output_dataset.write_schema(output_schema) print('\n'.join(query)) # print(dss_function) # recipe_output_table = dss_function.get('recipeOutputTable', "") # print('recipe_output_table before IF') # print(recipe_output_table) if requiresTransactions: print('Start transaction for create table') print(stTxn) executor.query_to_df(stTxn) # Detect error try: selectResult = executor.query_to_df(query) except Exception as error: err_str = str(error) err_str_list = err_str.split(" ") # trying to shorten the error for the modal in front-end if len(err_str_list) > 15: print ("\n=================================\n") print (error) print ("\n=================================\n") new_err_str = err_str_list[:15] new_err_str.append("\n\n") new_err_str.append("...") new_err_str = " ".join(new_err_str) raise RuntimeError(new_err_str) else: raise RuntimeError(err_str) if requiresTransactions: print('End transaction for create table') print(edTxn) executor.query_to_df(edTxn) print('Moving results to output...') if requiresTransactions: print('Start transaction for schema building') print(stTxn) executor.query_to_df(stTxn) # pythonrecipe_out = output_dataset # pythonrecipe_out.write_with_schema(selectResult) customOutputTableSQL = 'SELECT * from '+ outputTable.tablename + ' SAMPLE 0' selRes = executor.query_to_df(customOutputTableSQL) pythonrecipe_out = output_dataset pythonrecipe_out.write_schema_from_dataframe(selRes) # Additional Tables outtables = dss_function.get('output_tables', []) if(outtables != []): tableCounter = 1 print('Working on output tables') print(get_output_names_for_role('main')) print(outtables) for table in outtables: if table.get('value') != '' and table.get('value') != None: # try: print('Table') print(table) #Need to change this to max of split in order for multiple database or no-database table inputs #Change below might fix issue 4 of Jan 4 2018 for Nico. For non-drop tables try: main_output_name2 = list(filter(lambda out_dataset: out_dataset.split('.')[len(out_dataset.split('.'))-1] == table.get('value').split('.')[len(table.get('value').split('.'))-1].strip('\"'),get_output_names_for_role('main')))[0] except Exception as error: # print(error.message) raise RuntimeError('Unable to find an output dataset for '+table.get('value')+ 'It may not exist as an output Dataset: '+table.get('value')+"\n\Runtime Error:"+ error.message) print('Output name 2') print(main_output_name2) output_dataset2 = dataiku.Dataset(main_output_name2) # print("od2 printer") tableNamePrefix = output_dataset2.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('namingRule').get('tableNameDatasetNamePrefix') if tableNamePrefix != None or tableNamePrefix != '': print('Table prefix is not empty:' + tableNamePrefix) # except: # #Need to change this error # print('Error: Dataset for' + table.get('name') + ' not found') # raise Value customOutputTableSQL = 'SELECT * from '+ table.get('value') + ' SAMPLE 0' print('Working on table number:') print(tableCounter) print(customOutputTableSQL) selRes = executor.query_to_df(customOutputTableSQL) tableCounter += 1 pythonrecipe_out2 = output_dataset2 pythonrecipe_out2.write_schema_from_dataframe(selRes) if requiresTransactions: print('End transaction for schema building') print(edTxn) executor.query_to_df(edTxn) print('Complete!') # Uncomment end
sf_output = dataiku.Dataset(get_output_names_for_role('sf_output')[0]) sf_output_config = sf_output.get_config() sf_stage_name = get_stage_name(get_recipe_config(), get_plugin_config()) sf_table_name = get_table_name(sf_output_config) sf_connection_name = get_connection_name(sf_output_config) # When creating parquet, we want to prefix with "part". But when importing parquet, we don't care. sf_location = get_hdfs_location(hdfs_input_config, sf_stage_name) logger.info(f'SF Stage Location: {sf_location}') logger.info(f'SF Stage: {sf_stage_name}') logger.info(f'SF Table: {sf_table_name}') logger.info(f'SF Connection Name: {sf_connection_name}') # write the schema for our Snowflake table sf_output.write_schema(hdfs_input.read_schema(), dropAndCreate=True) # open a writer and close it so that it'll force DSS to create the table. with sf_output.get_writer(): pass sql = get_hdfs_to_snowflake_query(sf_location, sf_table_name, hdfs_input.read_schema()) logger.info(f'SF Query: {sql}') executor = SQLExecutor2(connection=sf_connection_name) results = executor.query_to_df(sql, post_queries=['COMMIT']) logger.info(f'COPY results: ${results.to_string()}')
#Detected TERA print('I am in TERA MODE') tmode = 'TERA' stTxn = 'BEGIN TRANSACTION;' edTxn = 'END TRANSACTION;' elif prop['name'] == 'TMODE' and prop['value'] == 'ANSI': #Detected ANSI print('I am in ANSI MODE') tmode = 'ANSI' stTxn = ';' edTxn = 'COMMIT WORK;' empty_table = input_A_datasets[0] #SQL Executor executor = SQLExecutor2(dataset=empty_table) defaultDB = default_database() if default_database(input_A_datasets[0]) != default_database(): raise RuntimeError( 'Input dataset and output dataset have different connection details') output_location = output_A_datasets[0].get_location_info()['info'] print('Getting Function Config') scriptAlias = function_config.get('script_alias', '') scriptFileName = function_config.get('script_filename', '') scriptLocation = function_config.get('script_location', '') searchPath = default_database() outputTable = output_A_datasets[0].get_location_info()['info'].get('table', '') partitionOrHash = function_config.get('partitionOrHash', '')