def get_or_create_stream(stream_name, shard_count): stream=None try: stream=kinesis.describe_stream(streamName) print(json.dumps(stream,sort_keys=True,indent=2,separators=(',',':'))) except ResourceNotFoundException as enfe: while(stream is None) or (stream['StreamStatus'] is not 'ACTIVE'): print('Could not find ACTIVE stream:0 trying to create.'.format(stream_name)) stream=kinesis.create_stream(stream_name,shard_count) time.sleep(0.5) return stream
from boto import kinesis aws_profile = "learner" kinesis = kinesis.connect_to_region("us-east-1") description = kinesis.describe_stream("pharmacy-records") print(description)
# python readstream.py # # Note that boto must be installed: # pip install boto from boto import kinesis import time # boto kinesis API kinesis = kinesis.connect_to_region("eu-west-1") # Stream to read from stream = 'logbuffer-dev' # Lookup shards of stream shards = kinesis.describe_stream(stream)["StreamDescription"]["Shards"] n_shards = len(shards) iterators = [] # create iterators for each shard for shard in shards: shard_id = shard["ShardId"] # iterators.append(kinesis.get_shard_iterator(stream, shard_id, "AT_SEQUENCE_NUMBER", shard["SequenceNumberRange"]["StartingSequenceNumber"])["ShardIterator"]) iterators.append(kinesis.get_shard_iterator(stream, shard_id, "LATEST")["ShardIterator"]) # circular query of iterators while 1==1: iterator = iterators.pop(0) out = kinesis.get_records(iterator, limit=500) iterators.append(out["NextShardIterator"]) for record in out["Records"]:
from boto import kinesis import testdata import datetime import json import time class Users(testdata.DictFactory): custid = testdata.RandomInteger(1, 10) amount = testdata.RandomInteger(1, 100) gateway = testdata.RandomSelection( ['visa', 'paypal', 'master', 'stripe', 'wallet']) if __name__ == '__main__': kinesis = kinesis.connect_to_region("ap-southeast-1") print kinesis.describe_stream("payments") print kinesis.list_streams() for user in Users().generate(10): print(user) print kinesis.put_record("payments", json.dumps(user), "partitionkey") time.sleep(1)
def kinesis_stream(stream): """ Returns Kinesis stream arn """ kinesis = boto.connect_kinesis() return kinesis.describe_stream(stream)['StreamDescription']['StreamARN']
kinesis = boto.connect_kinesis() # Create the stream for this instance ID kinesis.create_stream(stream_name, shard_count) except boto.kinesis.exceptions.ResourceInUseException: # Stream has already been created, this can be safely ignored pass tries = 0 # Try up to 10 times to open the stream while tries < 10: tries += 1 # Get the stream description response = kinesis.describe_stream(stream_name) # Is the stream active? if response['StreamDescription']['StreamStatus'] == 'ACTIVE': # Yes, the stream is active and we are ready to start reading from it. Get the shard ID. shard_id = response['StreamDescription']['Shards'][0]['ShardId'] break # The stream is not active, wait for 15 seconds and try again time.sleep(15) else: # The stream took too long to become active raise TimeoutError('Stream is still not active, aborting...') # Get the shard iterator and get only new data (TRIM_HORIZON) response = kinesis.get_shard_iterator(stream_name, shard_id, 'TRIM_HORIZON')
from boto import kinesis import testdata import json """kinesis = kinesis.connect_to_region("eu-west-1") stream = kinesis.create_stream("EdisonDemo", 1) kinesis.describe_stream("EdisonDemo") kinesis.list_streams() """ class Users(testdata.DictFactory): firstname = testdata.FakeDataFactory('firstName') lastname = testdata.FakeDataFactory('lastName') age = testdata.RandomInteger(10, 30) gender = testdata.RandomSelection(['female', 'male']) for user in Users().generate(50): print user kinesis.put_record("EdisonDemo",json.dumps(user),"partitionKey") shard_id = 'shardId-000000000000' shard_it = kinesis.get_shard_iterator("EdisonDemo", shard_id, "LATEST")["ShardIterator"] while True: out = kinesis.get_record(shard_it,limit=2) print out time.sleep(0.2)
def pause_until_kinesis_active(stream): kinesis = boto.connect_kinesis() # Wait for Kinesis stream to be active while kinesis.describe_stream(stream)['StreamDescription']['StreamStatus'] != 'ACTIVE': print('Kinesis stream [' + stream + '] not active yet') time.sleep(5)
import boto.kinesis from boto.kinesis.exceptions import ResourceNotFoundException from boto.kinesis.exceptions import ProvisionedThroughputExceededException ACCESS_KEY="AKIAIMV6XL4QVZA5GB2Q" SECRET_KEY="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" region_name="us-west-2" kinesis =boto.kinesis.connect_to_region(region_name,aws_access_key_id = ACCESS_KEY,aws_secret_access_key = SECRET_KEY) streamName="sound" partitionKey="IoTExample" shardCount=1 iterator_type='LATEST' stream=kinesis.describe_stream(streamName) print(json.dumps(stream,sort_keys=True,indent=2,separators=(',',':'))) shards=stream['StreamDescription']['Shards'] print('# Shard Count:', len(shards)) def processRecords(records): for record in records: text=record['Data'].lower() print 'Processing record with data:' + text i=0 response=kinesis.get_shard_iterator(streamName,shards[0]['ShardId'], 'TRIM_HORIZON',starting_sequence_number=None) next_iterator=response['ShardIterator'] print('Gettinng next records using iterator:', next_iterator) while i<4000: try:
from boto import kinesis import time kinesis = kinesis.connect_to_region("us-west-2") stream_name = "TestStream" tries = 0 while tries < 100: tries += 1 try: response = kinesis.describe_stream(stream_name) #print(response) if response['StreamDescription']['StreamStatus'] == 'ACTIVE': print("stream is active") shards = response['StreamDescription']['Shards'] for shard in shards: shard_id = shard["ShardId"] print(repr(shard)) shard_it = kinesis.get_shard_iterator( stream_name, shard_id, "LATEST")["ShardIterator"] while True: out = kinesis.get_records(shard_it, limit=2) for o in out["Records"]: print(o["Data"]) # You specific data processing goes here shard_it = out["NextShardIterator"] except: print('error while trying to describe kinesis stream : %s')
import datetime import json import csv import configparser config = configparser.ConfigParser() ini = config.read('conf2.ini') AWS_ACCESS_KEY_ID = config.get('AWS Credentials', 'key') AWS_SECRET_ACCESS_KEY = config.get('AWS Credentials', 'secret') kinesis = kinesis.connect_to_region("eu-west-1", aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) print kinesis.list_streams() print kinesis.describe_stream("rawdata") initial_uid = 32767376 bruce = "awesome" while bruce == "awesome": with open("Kinesis_Test_Data.csv", 'rb') as source_file: contents = csv.reader(source_file, delimiter=',', quotechar='|') for event in contents: data = dict() initial_uid = initial_uid + 1 data['uid'] = initial_uid data['event'] = event[0]
from boto import kinesis kinesis = kinesis.connect_to_region("eu-west-1") stream = kinesis.create_stream("BotoDemo", 1) kinesis.describe_stream("BotoDemo") kinesis.list_streams()
from boto import kinesis from settings import KINESIS_REGION, KINESIS_STREAM_NAME FLUSH_INTERVAL = 5 BATCH_SIZE = 20 running = True def process_messages(batch_msgs): print('messages processed: {}'.format(len(batch_msgs['Records']))) for msg in batch_msgs['Records']: print('message: "{}" offset: {}'.format(msg['Data'], msg['SequenceNumber'])) print('Connect to Kinesis Streams') kinesis = kinesis.connect_to_region(region_name=KINESIS_REGION) stream = kinesis.describe_stream(KINESIS_STREAM_NAME) shardId = stream['StreamDescription']['Shards'][0]['ShardId'] shardIterator = kinesis.get_shard_iterator(KINESIS_STREAM_NAME, shardId, 'TRIM_HORIZON') print('Kinesis consumer started!') while running: msgs = kinesis.get_records(shardIterator['ShardIterator'], limit=BATCH_SIZE) process_messages(msgs) shardIterator['ShardIterator'] = msgs['NextShardIterator'] print('\nnext batch in {} seconds...'.format(FLUSH_INTERVAL)) time.sleep(FLUSH_INTERVAL)