示例#1
0
def test_simple_ingest(connection_ctx, test_util):
    param = connection_ctx['param']

    pipe_name = '{}.{}.TEST_SIMPLE_INGEST_PIPE'.format(param['database'],
                                                       param['schema'])

    private_key = test_util.read_private_key()

    print(private_key)

    cur = connection_ctx['cnx'].cursor()

    test_file = os.path.join(test_util.get_data_dir(), 'test_file.csv')
    cur.execute(
        'create or replace table TEST_SIMPLE_INGEST_TABLE(c1 number, c2 string)'
    )
    cur.execute('create or replace stage TEST_SIMPLE_INGEST_STAGE')
    cur.execute('put file://{} @TEST_SIMPLE_INGEST_STAGE'.format(test_file))
    cur.execute(
        'create or replace pipe {0} as copy into TEST_SIMPLE_INGEST_TABLE '
        'from @TEST_SIMPLE_INGEST_STAGE'.format(pipe_name))

    ingest_manager = SimpleIngestManager(account=param['account'],
                                         user=param['user'],
                                         private_key=private_key,
                                         pipe=pipe_name,
                                         scheme=param['protocol'],
                                         host=param['host'],
                                         port=param['port'])

    staged_files = [StagedFile('test_file.csv.gz', None)]

    resp = ingest_manager.ingest_files(staged_files)

    assert resp['responseCode'] == 'SUCCESS'

    start_polling_time = time.time()

    while time.time() - start_polling_time < 120:
        history_resp = ingest_manager.get_history()

        if len(history_resp['files']) == 1:
            assert history_resp['files'][0]['path'] == 'test_file.csv.gz'
            return
        else:
            # wait for 20 seconds
            time.sleep(20)

    assert False
示例#2
0
class SnowpipeApi:
    def __init__(self, config: Config, pipe: str) -> None:
        self.logger = logging.getLogger(self.__class__.__name__)
        self.config = config
        self.ingest_manager = SimpleIngestManager(
            account=self.config.account,
            host=self.config.url,
            user=self.config.user,
            private_key=self.config.private_key_pem,
            pipe=pipe,
        )

    def report(self, recent_seconds: Optional[int] = None) -> None:
        request_id = uuid.uuid4()
        self.logger.debug(f'request_id: {request_id}')
        body = self.ingest_manager.get_history(recent_seconds=recent_seconds,
                                               request_id=request_id)
        print(json.dumps(body, indent=4))

    def history(self,
                start_time: datetime,
                end_time: Optional[datetime] = None) -> None:
        if not start_time:
            raise ValueError('start_time must be defined')
        request_id = uuid.uuid4()
        self.logger.debug(f'request_id: {request_id}')
        body = self.ingest_manager.get_history_range(
            start_time_inclusive=start_time.isoformat(),
            end_time_exclusive=end_time.isoformat() if end_time else None,
            request_id=request_id)
        print(json.dumps(body, indent=4))

    def ingest(self, files: Iterable[str]) -> None:
        if not files:
            raise ValueError('files must be defined')
        request_id = uuid.uuid4()
        self.logger.debug(f'request_id: {request_id}')
        staged_files = [StagedFile(name, None) for name in files]
        body = self.ingest_manager.ingest_files(
            staged_files=staged_files,
            request_id=request_id,
        )
        print(json.dumps(body, indent=4))
示例#3
0
    print(staged_file)

try:
    resp = ingest_manager.ingest_files(staged_file_list)
    print(resp)
except HTTPError as e:
    # HTTP error, may need to retry
    logger.error(e)
    exit(1)

# This means Snowflake has received file and will start loading
assert (resp['responseCode'] == 'SUCCESS')

# Needs to wait for a while to get result in history
while True:
    history_resp = ingest_manager.get_history()

    if len(history_resp['files']) > 0:
        print('Ingest Report:\n')
        print(history_resp)
        break
    else:
        # wait for 20 seconds
        time.sleep(20)

    hour = timedelta(hours=1)
    date = datetime.datetime.utcnow() - hour
    history_range_resp = ingest_manager.get_history_range(date.isoformat() +
                                                          'Z')

    print('\nHistory scan report: \n')
示例#4
0
    def load_via_snowpipe(self, s3_key, stream):
        """ Performs data transfer from the stage to snowflake using snowpipe. """
        def _generate_pipe_name(dbname, schema_table_name):
            stripped_db_name = dbname.replace('"', '')
            stripped_table_name = schema_table_name.replace('"', '')
            return f"{stripped_db_name}.{stripped_table_name}_s3_pipe"

        def _generate_pipe_args(pipe_name, schema_table_name,
                                columns_with_trans):
            pipe_args = dict(
                pipe_name=pipe_name,
                db_name=self.connection_config['dbname'],
                obj_name=schema_table_name,
                stage=self.connection_config['stage'],
                file_format=self.connection_config['file_format'],
                cols=', '.join([c['name'] for c in columns_with_trans]),
            )
            return pipe_args

        def _load_private_key():
            key_path = getattr(self.connection_config, "private_key_path",
                               "/rsa_key.p8")
            password = getattr(self.connection_config, "private_key_password",
                               None)
            with open(key_path, 'rb') as pem_in:
                private_key_obj = load_pem_private_key(
                    pem_in.read(),
                    password=password,
                    backend=default_backend())

            private_key_text = private_key_obj.private_bytes(
                Encoding.PEM, PrivateFormat.PKCS8,
                NoEncryption()).decode('utf-8')
            return private_key_text

        def _increment_value(exponentially=False):
            previous = 0
            current = 1
            while True:
                yield 2**(current + previous) if exponentially  \
                    else current + previous + 30
                current = current + previous
                previous = current - previous

        self.logger.info("Loading data using Snowpipe.")
        # Get list if columns with types and transformation
        columns_with_trans = [{
            "name": safe_column_name(name),
            "trans": column_trans(schema)
        } for (name, schema) in self.flatten_schema.items()]
        schema_table_name = self.table_name(stream, False)
        db_name = self.connection_config['dbname']

        pipe_name = _generate_pipe_name(db_name, schema_table_name)
        pipe_args = _generate_pipe_args(pipe_name, schema_table_name,
                                        columns_with_trans)

        create_pipe_sql = """create pipe {pipe_name} as
                            copy into {db_name}.{obj_name} ({cols})
                            from @{db_name}.{stage}
                            file_format = (format_name = {db_name}.{file_format} );""".format(
            **pipe_args)
        drop_pipe_sql = f"drop pipe if exists {pipe_name};"

        # Create snowpipe
        try:
            self.logger.debug("Creating snowpipe - %s.", pipe_name)

            # primary key in records found, raise warning
            if len(self.stream_schema_message['key_properties']) > 0:
                self.logger.warning(
                    "Primary key %s found in the data stream. Snowpipe can not be used to "
                    "consolidate records based upon keys. It can just copy data. "
                    "Please refer the docs for further details",
                    self.stream_schema_message['key_properties'])

            # primary key not present in the records, perform copy
            self.query(create_pipe_sql)
        except ProgrammingError as error:
            self.logger.error(
                "An error was encountered while creating the snowpipe, %s",
                error)

        #  Private key encription required to perform snowpipe data transfer
        private_key_text = _load_private_key()

        ingest_manager = SimpleIngestManager(
            account=self.connection_config['account'].split('.')[0],
            host=self.connection_config['account'] + '.snowflakecomputing.com',
            user=self.connection_config['user'],
            pipe=pipe_name,
            scheme='https',
            port=443,
            private_key=private_key_text)

        # List of files, but wrapped into a class
        staged_file_list = [StagedFile(s3_key, None)]

        #ingest files using snowpipe
        retries = self.connection_config.get('max_retry', 5)
        wait_time = _increment_value(exponentially=True)
        while True:
            try:
                self.logger.debug(
                    "Starting to ingest file via snowpipe, retries left %s",
                    retries)
                resp = ingest_manager.ingest_files(staged_file_list)
                self.logger.info(
                    "Snowpipe has recived the files and will now start loading: %s",
                    resp['responseCode'])
                break
            except HTTPError as e:
                # HTTP error, wait and retry, exit if still fails
                self.logger.error(e)
                time.sleep(next(wait_time))
                retries -= 1
                if not retries:
                    self.logger.critcal(
                        "Max retry limit reached, Failed to load data using snowpipe"
                    )
                    sys.exit(1)

        # Needs to wait for a while to perform transfer, delete pipe after transfer
        wait_time = _increment_value()
        while True:
            history_resp = ingest_manager.get_history()

            if len(history_resp['files']) > 0:
                self.logger.info(
                    '''Ingest Report for snowpipe : %s
                                    STATUS: %s
                                    rowsInserted(rowsParsed): %s(%s)''',
                    history_resp['pipe'], history_resp['completeResult'],
                    history_resp['files'][0]['rowsInserted'],
                    history_resp['files'][0]['rowsParsed'])
                self.query(drop_pipe_sql)
                break
            else:
                self.logger.debug('waiting for snowpipe to transfer data...')
                time.sleep(next(wait_time))