def test_simple_ingest(connection_ctx, test_util): param = connection_ctx['param'] pipe_name = '{}.{}.TEST_SIMPLE_INGEST_PIPE'.format(param['database'], param['schema']) private_key = test_util.read_private_key() print(private_key) cur = connection_ctx['cnx'].cursor() test_file = os.path.join(test_util.get_data_dir(), 'test_file.csv') cur.execute( 'create or replace table TEST_SIMPLE_INGEST_TABLE(c1 number, c2 string)' ) cur.execute('create or replace stage TEST_SIMPLE_INGEST_STAGE') cur.execute('put file://{} @TEST_SIMPLE_INGEST_STAGE'.format(test_file)) cur.execute( 'create or replace pipe {0} as copy into TEST_SIMPLE_INGEST_TABLE ' 'from @TEST_SIMPLE_INGEST_STAGE'.format(pipe_name)) ingest_manager = SimpleIngestManager(account=param['account'], user=param['user'], private_key=private_key, pipe=pipe_name, scheme=param['protocol'], host=param['host'], port=param['port']) staged_files = [StagedFile('test_file.csv.gz', None)] resp = ingest_manager.ingest_files(staged_files) assert resp['responseCode'] == 'SUCCESS' start_polling_time = time.time() while time.time() - start_polling_time < 120: history_resp = ingest_manager.get_history() if len(history_resp['files']) == 1: assert history_resp['files'][0]['path'] == 'test_file.csv.gz' return else: # wait for 20 seconds time.sleep(20) assert False
class SnowpipeApi: def __init__(self, config: Config, pipe: str) -> None: self.logger = logging.getLogger(self.__class__.__name__) self.config = config self.ingest_manager = SimpleIngestManager( account=self.config.account, host=self.config.url, user=self.config.user, private_key=self.config.private_key_pem, pipe=pipe, ) def report(self, recent_seconds: Optional[int] = None) -> None: request_id = uuid.uuid4() self.logger.debug(f'request_id: {request_id}') body = self.ingest_manager.get_history(recent_seconds=recent_seconds, request_id=request_id) print(json.dumps(body, indent=4)) def history(self, start_time: datetime, end_time: Optional[datetime] = None) -> None: if not start_time: raise ValueError('start_time must be defined') request_id = uuid.uuid4() self.logger.debug(f'request_id: {request_id}') body = self.ingest_manager.get_history_range( start_time_inclusive=start_time.isoformat(), end_time_exclusive=end_time.isoformat() if end_time else None, request_id=request_id) print(json.dumps(body, indent=4)) def ingest(self, files: Iterable[str]) -> None: if not files: raise ValueError('files must be defined') request_id = uuid.uuid4() self.logger.debug(f'request_id: {request_id}') staged_files = [StagedFile(name, None) for name in files] body = self.ingest_manager.ingest_files( staged_files=staged_files, request_id=request_id, ) print(json.dumps(body, indent=4))
print(staged_file) try: resp = ingest_manager.ingest_files(staged_file_list) print(resp) except HTTPError as e: # HTTP error, may need to retry logger.error(e) exit(1) # This means Snowflake has received file and will start loading assert (resp['responseCode'] == 'SUCCESS') # Needs to wait for a while to get result in history while True: history_resp = ingest_manager.get_history() if len(history_resp['files']) > 0: print('Ingest Report:\n') print(history_resp) break else: # wait for 20 seconds time.sleep(20) hour = timedelta(hours=1) date = datetime.datetime.utcnow() - hour history_range_resp = ingest_manager.get_history_range(date.isoformat() + 'Z') print('\nHistory scan report: \n')
def load_via_snowpipe(self, s3_key, stream): """ Performs data transfer from the stage to snowflake using snowpipe. """ def _generate_pipe_name(dbname, schema_table_name): stripped_db_name = dbname.replace('"', '') stripped_table_name = schema_table_name.replace('"', '') return f"{stripped_db_name}.{stripped_table_name}_s3_pipe" def _generate_pipe_args(pipe_name, schema_table_name, columns_with_trans): pipe_args = dict( pipe_name=pipe_name, db_name=self.connection_config['dbname'], obj_name=schema_table_name, stage=self.connection_config['stage'], file_format=self.connection_config['file_format'], cols=', '.join([c['name'] for c in columns_with_trans]), ) return pipe_args def _load_private_key(): key_path = getattr(self.connection_config, "private_key_path", "/rsa_key.p8") password = getattr(self.connection_config, "private_key_password", None) with open(key_path, 'rb') as pem_in: private_key_obj = load_pem_private_key( pem_in.read(), password=password, backend=default_backend()) private_key_text = private_key_obj.private_bytes( Encoding.PEM, PrivateFormat.PKCS8, NoEncryption()).decode('utf-8') return private_key_text def _increment_value(exponentially=False): previous = 0 current = 1 while True: yield 2**(current + previous) if exponentially \ else current + previous + 30 current = current + previous previous = current - previous self.logger.info("Loading data using Snowpipe.") # Get list if columns with types and transformation columns_with_trans = [{ "name": safe_column_name(name), "trans": column_trans(schema) } for (name, schema) in self.flatten_schema.items()] schema_table_name = self.table_name(stream, False) db_name = self.connection_config['dbname'] pipe_name = _generate_pipe_name(db_name, schema_table_name) pipe_args = _generate_pipe_args(pipe_name, schema_table_name, columns_with_trans) create_pipe_sql = """create pipe {pipe_name} as copy into {db_name}.{obj_name} ({cols}) from @{db_name}.{stage} file_format = (format_name = {db_name}.{file_format} );""".format( **pipe_args) drop_pipe_sql = f"drop pipe if exists {pipe_name};" # Create snowpipe try: self.logger.debug("Creating snowpipe - %s.", pipe_name) # primary key in records found, raise warning if len(self.stream_schema_message['key_properties']) > 0: self.logger.warning( "Primary key %s found in the data stream. Snowpipe can not be used to " "consolidate records based upon keys. It can just copy data. " "Please refer the docs for further details", self.stream_schema_message['key_properties']) # primary key not present in the records, perform copy self.query(create_pipe_sql) except ProgrammingError as error: self.logger.error( "An error was encountered while creating the snowpipe, %s", error) # Private key encription required to perform snowpipe data transfer private_key_text = _load_private_key() ingest_manager = SimpleIngestManager( account=self.connection_config['account'].split('.')[0], host=self.connection_config['account'] + '.snowflakecomputing.com', user=self.connection_config['user'], pipe=pipe_name, scheme='https', port=443, private_key=private_key_text) # List of files, but wrapped into a class staged_file_list = [StagedFile(s3_key, None)] #ingest files using snowpipe retries = self.connection_config.get('max_retry', 5) wait_time = _increment_value(exponentially=True) while True: try: self.logger.debug( "Starting to ingest file via snowpipe, retries left %s", retries) resp = ingest_manager.ingest_files(staged_file_list) self.logger.info( "Snowpipe has recived the files and will now start loading: %s", resp['responseCode']) break except HTTPError as e: # HTTP error, wait and retry, exit if still fails self.logger.error(e) time.sleep(next(wait_time)) retries -= 1 if not retries: self.logger.critcal( "Max retry limit reached, Failed to load data using snowpipe" ) sys.exit(1) # Needs to wait for a while to perform transfer, delete pipe after transfer wait_time = _increment_value() while True: history_resp = ingest_manager.get_history() if len(history_resp['files']) > 0: self.logger.info( '''Ingest Report for snowpipe : %s STATUS: %s rowsInserted(rowsParsed): %s(%s)''', history_resp['pipe'], history_resp['completeResult'], history_resp['files'][0]['rowsInserted'], history_resp['files'][0]['rowsParsed']) self.query(drop_pipe_sql) break else: self.logger.debug('waiting for snowpipe to transfer data...') time.sleep(next(wait_time))