def __init__(self, stream_name: str, partition_key: str, *, chunk_size: int = DEFAULT_CHUNK_SIZE, encoder: str = 'utf-8', workers: int = 1, **boto_session_kwargs): """ :param stream_name: Name of the Kinesis stream :type stream_name: str :param partition_key: Kinesis partition key used to group data by shards :type partition_key: str :param chunk_size: the size of a a chunk of records for rotation threshold (default 524288) :type chunk_size: int :param encoder: the encoder to be used for log records (default 'utf-8') :type encoder: str :param workers: the number of background workers that rotate log records (default 1) :type workers: int :param boto_session_kwargs: additional keyword arguments for the AWS Kinesis Resource :type boto_session_kwargs: boto3 resource keyword arguments """ args_validation = ( ValidationRule(stream_name, is_non_empty_string, empty_str_err('stream_name')), ValidationRule(chunk_size, is_positive_int, bad_integer_err('chunk_size')), ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')), ValidationRule(workers, is_positive_int, bad_integer_err('workers')), ) for rule in args_validation: assert rule[1](rule[0]), rule[3] self.stream = KinesisStream(stream_name, partition_key, chunk_size=chunk_size, encoder=encoder, workers=workers, **boto_session_kwargs) # Make sure we gracefully clear the buffers and upload the missing parts before exiting signal.signal(signal.SIGTERM, self._teardown) signal.signal(signal.SIGINT, self._teardown) signal.signal(signal.SIGQUIT, self._teardown) atexit.register(self.close) StreamHandler.__init__(self, self.stream)
def __init__(self, key: str, bucket: str, *, chunk_size: int = DEFAULT_CHUNK_SIZE, time_rotation: int = DEFAULT_ROTATION_TIME_SECS, max_file_size_bytes: int = MAX_FILE_SIZE_BYTES, encoder: str = 'utf-8', max_threads: int = 1, compress: bool = False, **boto_session_kwargs): """ :param key: The path of the S3 object :type key: str :param bucket: The id of the S3 bucket :type bucket: str :param chunk_size: size of a chunk in the multipart upload in bytes (default 5MB) :type chunk_size: int :param time_rotation: Interval in seconds to rotate the file by (default 12 hours) :type time_rotation: int :param max_file_size_bytes: maximum file size in bytes before rotation (default 100MB) :type max_file_size_bytes: int :param encoder: default utf-8 :type encoder: str :param max_threads: the number of threads that a stream handler would run for file and chunk rotation tasks, only useful if emitting lot's of records :type max_threads: int :param compress: indicating weather to save a compressed gz suffixed file :type compress: bool """ args_validation = ( ValidationRule(time_rotation, is_positive_int, bad_integer_err('time_rotation')), ValidationRule(max_file_size_bytes, is_positive_int, bad_integer_err('max_file_size_bytes')), ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')), ValidationRule(max_threads, is_positive_int, bad_integer_err('thread_count')), ) for rule in args_validation: assert rule.func(rule.arg), rule.message self.bucket = bucket self.stream = S3Stream(self.bucket, key, chunk_size=chunk_size, max_file_log_time=time_rotation, max_file_size_bytes=max_file_size_bytes, encoder=encoder, workers=max_threads, compress=compress, **boto_session_kwargs) # Make sure we gracefully clear the buffers and upload the missing parts before exiting signal.signal(signal.SIGTERM, self._teardown) signal.signal(signal.SIGINT, self._teardown) signal.signal(signal.SIGQUIT, self._teardown) atexit.register(self.close) StreamHandler.__init__(self, self.stream)
def __init__(self, file_path, bucket, key_id, secret, chunk_size=DEFAULT_CHUNK_SIZE, time_rotation=DEFAULT_ROTATION_TIME_SECS, max_file_size_bytes=MAX_FILE_SIZE_BYTES, encoder='utf-8', max_threads=3, compress=False): """ :param file_path: The path of the S3 object :param bucket: The id of the S3 bucket :param key_id: Authentication key :param secret: Authentication secret :param chunk_size: Size of a chunk in the multipart upload in bytes - default 5MB :param time_rotation: Interval in seconds to rotate the file by - default 12 hours :param max_file_size_bytes: Maximum file size in bytes before rotation - default 100MB :param encoder: default utf-8 :param max_threads: the number of threads that a stream handler would run for file and chunk rotation tasks :param compress: Boolean indicating weather to save a compressed gz suffixed file """ args_validation = ( ValidationRule(file_path, is_non_empty_string, empty_str_err('file_path')), ValidationRule(bucket, is_non_empty_string, empty_str_err('bucket')), ValidationRule(key_id, is_non_empty_string, empty_str_err('key_id')), ValidationRule(secret, is_non_empty_string, empty_str_err('secret')), ValidationRule(chunk_size, is_positive_int, bad_integer_err('chunk_size')), ValidationRule(time_rotation, is_positive_int, bad_integer_err('time_rotation')), ValidationRule(max_file_size_bytes, is_positive_int, bad_integer_err('max_file_size_bytes')), ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')), ValidationRule(max_threads, is_positive_int, bad_integer_err('thread_count')), ValidationRule(compress, is_boolean, bad_type_error('compress', 'boolean')) ) for rule in args_validation: assert rule[1](rule[0]), rule[3] self.bucket = bucket self.secret = secret self.key_id = key_id self.stream = S3Streamer(self.bucket, file_path, chunk_size=chunk_size, max_file_log_time=time_rotation, max_file_size_bytes=max_file_size_bytes, encoder=encoder, workers=max_threads, compress=compress, key_id=self.key_id, secret=self.secret) # Make sure we gracefully clear the buffers and upload the missing parts before exiting signal.signal(signal.SIGTERM, self.close) signal.signal(signal.SIGINT, self.close) signal.signal(signal.SIGQUIT, self.close) atexit.register(self.close) StreamHandler.__init__(self, self.stream)
def __init__(self, key_id, secret, stream_name, region, partition='single', chunk_size=DEFAULT_CHUNK_SIZE, encoder='utf-8', workers=3): """ :param key_id: Authentication key :param secret: Authentication secret :param stream_name: The name of the kinesis stream :param region: The AWS region for the kinesis stream :param partition: A partition name in case multiple shards are used :param chunk_size: Size of a chunk in the multipart upload in bytes - default 5MB :param encoder: default utf-8 :param workers: the number of threads that a stream handler would run for file and chunk rotation tasks """ args_validation = ( ValidationRule(key_id, is_non_empty_string, empty_str_err('key_id')), ValidationRule(secret, is_non_empty_string, empty_str_err('secret')), ValidationRule(stream_name, is_non_empty_string, empty_str_err('stream_name')), ValidationRule(region, is_non_empty_string, empty_str_err('region')), ValidationRule(partition, is_non_empty_string, empty_str_err('partition')), ValidationRule(chunk_size, is_positive_int, bad_integer_err('chunk_size')), ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')), ValidationRule(workers, is_positive_int, bad_integer_err('workers')), ) for rule in args_validation: assert rule[1](rule[0]), rule[3] self.secret = secret self.key_id = key_id self.stream = KinesisStreamer(self.key_id, self.secret, stream_name, region, partition, chunk_size, encoder, workers=workers) # Make sure we gracefully clear the buffers and upload the missing parts before exiting signal.signal(signal.SIGTERM, self.close) signal.signal(signal.SIGINT, self.close) signal.signal(signal.SIGQUIT, self.close) atexit.register(self.close) StreamHandler.__init__(self, self.stream)
max_file_size_bytes=MAX_FILE_SIZE_BYTES, encoder='utf-8', max_threads=3, compress=False): """ :param file_path: The path of the S3 object :param bucket: The id of the S3 bucket :param chunk_size: Size of a chunk in the multipart upload in bytes - default 5MB :param time_rotation: Interval in seconds to rotate the file by - default 12 hours :param max_file_size_bytes: Maximum file size in bytes before rotation - default 100MB :param encoder: default utf-8 :param max_threads: the number of threads that a stream handler would run for file and chunk rotation tasks :param compress: Boolean indicating weather to save a compressed gz suffixed file """ args_validation = ( ValidationRule(file_path, is_non_empty_string, empty_str_err('file_path')), ValidationRule(bucket, is_non_empty_string, empty_str_err('bucket')), ValidationRule(chunk_size, is_positive_int, bad_integer_err('chunk_size')), ValidationRule(time_rotation, is_positive_int, bad_integer_err('time_rotation')), ValidationRule(max_file_size_bytes, is_positive_int, bad_integer_err('max_file_size_bytes')), ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')), ValidationRule(max_threads, is_positive_int, bad_integer_err('thread_count')), ValidationRule(compress, is_boolean, bad_type_error('compress', 'boolean')) ) for rule in args_validation: assert rule[1](rule[0]), rule[3] self.bucket = bucket self.stream = S3Streamer(self.bucket, file_path, chunk_size, time_rotation, ServerSideEncryption=ServerSideEncryption, SSEKMSKeyId=SSEKMSKeyId,