def flush_records(stream: str, records: List[Dict], db_sync: DbSync, temp_dir: str = None, no_compression: bool = False) -> None: """ Takes a list of record messages and loads it into the snowflake target table Args: stream: Name of the stream records: List of dictionary, that represents multiple csv lines. Dict key is the column name, value is the column value row_count: db_sync: A DbSync object temp_dir: Directory where intermediate temporary files will be created. (Default: OS specificy temp directory) no_compression: Disable to use compressed files. (Default: False) Returns: None """ # Generate file on disk in the required format filepath = db_sync.file_format.formatter.records_to_file( records, db_sync.flatten_schema, compression=not no_compression, dest_dir=temp_dir, data_flattening_max_level=db_sync.data_flattening_max_level) # Get file stats row_count = len(records) size_bytes = os.path.getsize(filepath) # Upload to s3 and load into Snowflake s3_key = db_sync.put_to_stage(filepath, stream, row_count, temp_dir=temp_dir) db_sync.load_file(s3_key, row_count, size_bytes) # Delete file from local disk and from s3 os.remove(filepath) db_sync.delete_from_stage(stream, s3_key)
def flush_records(stream: str, records: List[Dict], db_sync: DbSync, temp_dir: str = None, no_compression: bool = False, archive_load_files: Dict = None) -> None: """ Takes a list of record messages and loads it into the snowflake target table Args: stream: Name of the stream records: List of dictionary, that represents multiple csv lines. Dict key is the column name, value is the column value row_count: db_sync: A DbSync object temp_dir: Directory where intermediate temporary files will be created. (Default: OS specific temp directory) no_compression: Disable to use compressed files. (Default: False) archive_load_files: Data needed for archive load files. (Default: None) Returns: None """ # Generate file on disk in the required format filepath = db_sync.file_format.formatter.records_to_file(records, db_sync.flatten_schema, compression=not no_compression, dest_dir=temp_dir, data_flattening_max_level= db_sync.data_flattening_max_level) # Get file stats row_count = len(records) size_bytes = os.path.getsize(filepath) # Upload to s3 and load into Snowflake s3_key = db_sync.put_to_stage(filepath, stream, row_count, temp_dir=temp_dir) db_sync.load_file(s3_key, row_count, size_bytes) # Delete file from local disk os.remove(filepath) if archive_load_files: stream_name_parts = stream_utils.stream_name_to_dict(stream) if 'schema_name' not in stream_name_parts or 'table_name' not in stream_name_parts: raise Exception("Failed to extract schema and table names from stream '{}'".format(stream)) archive_schema = stream_name_parts['schema_name'] archive_table = stream_name_parts['table_name'] archive_tap = archive_load_files['tap'] archive_metadata = { 'tap': archive_tap, 'schema': archive_schema, 'table': archive_table, 'archived-by': 'pipelinewise_target_snowflake' } if 'column' in archive_load_files: archive_metadata.update({ 'incremental-key': archive_load_files['column'], 'incremental-key-min': str(archive_load_files['min']), 'incremental-key-max': str(archive_load_files['max']) }) # Use same file name as in import archive_file = os.path.basename(s3_key) archive_key = "{}/{}/{}".format(archive_tap, archive_table, archive_file) db_sync.copy_to_archive(s3_key, archive_key, archive_metadata) # Delete file from S3 db_sync.delete_from_stage(stream, s3_key)