def test_kcl_py_integration_test_perfect_input(): test_input_json = "\n".join( map(lambda j: json.dumps(j), test_input_messages)) input_file = make_io_obj(test_input_json) output_file = make_io_obj() error_file = make_io_obj() process = kcl.KCLProcess(RecordProcessor(test_shard_id, test_sequence_number), input_file=input_file, output_file=output_file, error_file=error_file) process.run() ''' The strings are approximately the same, modulo whitespace. ''' output_message_list = filter(lambda s: s != "", output_file.getvalue().split("\n")) responses = [json.loads(s) for s in output_message_list] assert len(responses) == len(test_output_messages) for i in range(len(responses)): assert responses[i] == test_output_messages[i] ''' There should be some error output but it seems like overly specific to make sure that a particular message is printed. ''' error_output = error_file.getvalue() assert error_output == ""
def test_kcl_process_exits_on_action_message_exception(): mock_rp = Mock() # type: kcl.RecordProcessorBase # Our record processor will just fail during initialization kcl_process = kcl.KCLProcess(mock_rp, # This will suffice because a checkpoint message won't be understood by # the KCLProcessor (only the Checkpointer understands them) input_file=make_io_obj('{"action":"invalid", "error" : "badstuff", ' '"sequenceNumber": "1234", "subSequenceNumber": 1}'), output_file=make_io_obj(), error_file=make_io_obj()) try: kcl_process.run() assert 0, 'Should have received an exception here' except dispatch.MalformedAction: pass
def test_kcl_process_exits_on_record_processor_exception(): unique_string = "Super uniqe statement we can look for" errorFile = make_io_obj() class ClientException(Exception): pass mock_rp = Mock() # type: kcl.RecordProcessorBase # Our record processor will just fail during initialization mock_rp.initialize.side_effect = [ClientException(unique_string)] kcl_process = kcl.KCLProcess(mock_rp, input_file=make_io_obj('{"action":"initialize", "shardId" : "shardid-123", ' '"sequenceNumber": "1234", "subSequenceNumber": 1}'), output_file=make_io_obj(), error_file=errorFile) try: kcl_process.run() except ClientException: assert 0, "Should not have seen the ClientException propagate up the call stack." assert errorFile.getvalue().count(unique_string) > 0, 'We should see our error message printed to the error file'
def run_processor(log_file=None, processor_func=None): proc = kcl.KCLProcess(KinesisProcessor(log_file, processor_func)) proc.run()
#!/usr/bin/env python3 import logging import sys from amazon_kclpy import kcl from samples import sample_kclpy_app class RecordProcessor(sample_kclpy_app.RecordProcessor): """Override the process_record method for demo purposes.""" def process_record(self, data, partition_key, sequence_number, sub_sequence_number): """ Called for each record that is passed to process_records. :param str data: The blob of data that was contained in the record. :param str partition_key: The key associated with this record. :param int sequence_number: The sequence number associated with this record. :param int sub_sequence_number: the sub sequence number associated with this record. """ logging.info('Here is what a logging message with a data of "%s" looks like.', data) print('Here is what it looks like when we print to stdout: data="{}"'.format(data), flush=True) print('Here is what it looks like when we print to stderr: data="{}"'.format(data), file=sys.stderr) if __name__ == "__main__": kcl_process = kcl.KCLProcess(RecordProcessor()) kcl_process.run()
def main(): kinesis_output_filename_prefix = os.getenv("KINESIS_OUTPUT_FILENAME_PREFIX", 'kinesis_dump') log_per_record_processor = KinesisToFileProcessor(filename_prefix=kinesis_output_filename_prefix) kcl_process = kcl.KCLProcess(log_per_record_processor) kcl_process.run()
#!/usr/bin/env python3 import config import logging from amazon_kclpy import kcl from record_processor import RecordProcessor logger = logging.getLogger(__name__) class StdOutProcessor(RecordProcessor): SLEEP_SECONDS = config.SLEEP_SECONDS CHECKPOINT_RETRIES = config.CHECKPOINT_RETRIES CHECKPOINT_FREQ_SECONDS = config.CHECKPOINT_FREQ_SECONDS def process_record(self, data: str, partition_key: str, sequence_number: int): logger.info(data) if __name__ == "__main__": app = kcl.KCLProcess(StdOutProcessor()) app.run()
def main(): kclprocess = kcl.KCLProcess( Dervish(table_name=sys.argv[1], s3bucket=sys.argv[2], s3path=sys.argv[3])) kclprocess.run()
#!/usr/bin/env python from amazon_kclpy import kcl from base import RecordProcessor class StdoutProcessor(RecordProcessor): def process_record(self, data, partition_key, sequence_number): print data if __name__ == "__main__": kclprocess = kcl.KCLProcess(StdoutProcessor()) kclprocess.run()
def shutdown(self, checkpointer, reason): ''' Called by a KCLProcess instance to indicate that this record processor should shutdown. After this is called, there will be no more calls to any other methods of this record processor. :type checkpointer: amazon_kclpy.kcl.Checkpointer :param checkpointer: A checkpointer which accepts a sequence number or no parameters. :type reason: str :param reason: The reason this record processor is being shutdown, either TERMINATE or ZOMBIE. If ZOMBIE, clients should not checkpoint because there is possibly another record processor which has acquired the lease for this shard. If TERMINATE then checkpointer.checkpoint() should be called to checkpoint at the end of the shard so that this processor will be shutdown and new processor(s) will be created to for the child(ren) of this shard. ''' try: if reason == 'TERMINATE': # Checkpointing with no parameter will checkpoint at the # largest sequence number reached by this processor on this # shard id self.checkpoint(checkpointer, str(None)) else: # reason == 'ZOMBIE' print('Shutting down due to failover. Will not checkpoint.') except: pass if __name__ == "__main__": kclprocess = kcl.KCLProcess(Dervish()) kclprocess.run()