def process_article(article_path): global processed_count global start_time global kinesis_stream global doc_buffer with open(article_path, "r") as article_file: text = article_file.read() lines = text.split("\n") header = lines[0] m = re.match(r"<doc id=\"([0-9]+)\" url=\"(.+)\" title=\"(.+)\"", header) if m: article_id = int(m.group(1)) article_text = "\n".join(lines[1:-1]) doc = {"article_id": str(uuid.uuid4()), "url": m.group(2), "title": m.group(3), "body": article_text} if INSERT_IN_BATCHES: doc_buffer.append(json.dumps(doc)) if len(doc_buffer) == BATCH_SIZE: kinesis.put_records(kinesis_stream, doc_buffer, str(uuid.uuid4())) doc_buffer = [] else: kinesis.put_record(kinesis_stream, json.dumps(doc), str(uuid.uuid4())) print "Sent article '{}' ({})".format(doc["title"], doc["article_id"]) else: print "Header not found in {0}".format(article_file)
def put_record(self, record): """ Put a record (<=1MB) on to a Kinesis stream """ kinesis.put_record(self.stream_name, json.dumps(record), self.get_partition_key())
def run(): from boto import kinesis kinesis = kinesis.connect_to_region("ca-central-1") #stream = kinesis.create_stream("ErikSparkPOC", 1) start = time.time() reddit = praw.Reddit(client_id='gluFwvMrQLqLuA', client_secret='nowLOmNuC8tS76mrc-LQUlarngw', user_agent='testscript by /u/plasmatrendybot', password='******', username='******') # Subreddit name subreddit = reddit.subreddit('askreddit') comments = subreddit.stream.comments() escape_limit = 100000 x = 0 for comment in comments: if comment.ups > 0: print(comment.body, comment.ups) reddit_comment = RedditComment( comment.body.encode('ascii', 'ignore')) try: kinesis.put_record("ErikSparkPOC", json.dumps(reddit_comment), "partitionkey") except: print("Failed to insert to Kinesis") x = x + 1 if x > escape_limit: break
def extractCSV(csvfile): with open(csvfile) as sec_file: reader = csv.DictReader(sec_file, delimiter=',') for row in reader: data = json.dumps( getPricesData(row['Securityid'], row['Ticker'], row['Price'], row['Date'])) kinesis.put_record("SecurityStream", data, "partitionkey") print(data)
def run(): from boto import kinesis kinesis = kinesis.connect_to_region("ca-central-1") #stream = kinesis.create_stream("ErikDemo", 1) i = 0 for i in xrange(10): user = User("Demo") #print(json.dumps(user)) print("Did I make it here?") kinesis.put_record("ErikDemo", json.dumps(user), "partitionkey")
def runController(): sound=readSoundSensor() timestamp=datetime.datetime.utcnow() record=str(timestamp)+":"+str(sound) print "Putting record in stream:"+record response=kinesis.put_record(stream_name=streamName,data=record,partition_key=partitionKey) print ("-=put seqNum:",response['SequenceNumber'])
def encode_and_send_frame(frame, frame_count, enable_kinesis=True, enable_rekog=False, write_file=False): try: #convert opencv Mat to jpg image #print "----FRAME---" retval, buff = cv2.imencode(".jpg", frame) img_bytes = bytearray(buff) utc_dt = pytz.utc.localize(datetime.datetime.now()) now_ts_utc = ( utc_dt - datetime.datetime(1970, 1, 1, tzinfo=pytz.utc)).total_seconds() frame_package = { 'ApproximateCaptureTime': now_ts_utc, 'FrameCount': frame_count, 'ImageBytes': img_bytes } if write_file: print("Writing file img_{}.jpg".format(frame_count)) target = open("img_{}.jpg".format(frame_count), 'w') target.write(img_bytes) target.close() # Put encoded image in kinesis stream if enable_kinesis: print("....Sending image to Kinesis") response = kinesis.put_record( stream_name=stream_name, # StreamName in boto3 data=cPickle.dumps(frame_package, 0), # Data in boto3 partition_key="partitionkey" # PartitionKey in boto3 ) print('Response: \n', response) if enable_rekog: response = rekog_client.detect_labels(Image={'Bytes': img_bytes}, MaxLabels=rekog_max_labels, MinConfidence=rekog_min_conf) print(response) except Exception as e: print(e)
from boto import kinesis from settings import KINESIS_PARTITION_KEY, KINESIS_STREAM_NAME, KINESIS_REGION MESSAGE_COUNT = 1000 print('Kinesis stream producer started!') kinesis = kinesis.connect_to_region(KINESIS_REGION) for i in range(MESSAGE_COUNT): message = 'hello world - {}'.format(i + 1) kinesis.put_record(KINESIS_STREAM_NAME, message, KINESIS_PARTITION_KEY) print('{}/{} - {}'.format(i + 1, MESSAGE_COUNT, message))
from lib.users import Users import json from boto import kinesis aws_region = "us-west-2" user = "******" password = "******" interval = 1500 count = 100 stream_name = "TestStream" u = Users(user, password, interval, count) x = u.list() for line in x.iter_lines(): kinesis = kinesis.connect_to_region(aws_region) kinesis.put_record(stream_name, line, "partitionkey") if line: print(line)
from boto import kinesis import testdata,json # Creating fake data class Users(testdata.DictFactory): firstname = testdata.FakeDataFactory("firstName") lastname = testdata.FakeDataFactory("lastName") age = testdata.RandomInteger(10,30) gender = testdata.RandomSelection(['female','male']) # Using boto connect to the region in which your kinesis stream is created kinesis = kinesis.connect_to_region("eu-west-1") for user in Users().generate(50): print user kinesis.put_record("EdisonDemo", json.dumps(user), "partitionkey")
from boto import kinesis import testdata import datetime import json import time class Users(testdata.DictFactory): custid = testdata.RandomInteger(1, 10) amount = testdata.RandomInteger(1, 100) gateway = testdata.RandomSelection( ['visa', 'paypal', 'master', 'stripe', 'wallet']) if __name__ == '__main__': kinesis = kinesis.connect_to_region("ap-southeast-1") print kinesis.describe_stream("payments") print kinesis.list_streams() for user in Users().generate(10): print(user) print kinesis.put_record("payments", json.dumps(user), "partitionkey") time.sleep(1)
import json import requests #import testdata from boto import kinesis #connecting to Kinesis stream region = 'us-east-1' kinesisStreamName = 'kinesis-demo' kinesis = kinesis.connect_to_region(region) partitionKey = 'shardId-000000000000' # generating data and feeding kinesis. while True: response = requests.get( 'https://chasing-coins.com/api/v1/top-coins/20').json() for coin in response: data = json.dumps(response[coin]) print data result = kinesis.put_record(kinesisStreamName, data, partitionKey) time.sleep(0.2) # class Users(testdata.DictFactory): # firstname = testdata.FakeDataFactory('firstName') # lastname = testdata.FakeDataFactory('lastName') # age = testdata.RandomInteger(10, 30) # gender = testdata.RandomSelection(['female', 'male']) # for user in Users().generate(50): # print(user) # kinesis.put_record(kinesisStreamName, json.dumps(user), partitionKey)
kinesis = kinesis.connect_to_region(region) # generating data and feeding kinesis. while True: y = random_generator(10, "techsummit2015") urls = ['foo.com', 'amazon.com', 'testing.com', 'google.com', 'sydney.com'] x = random.randint(0, 4) userid = random.randint(25, 35) + 1200 now = datetime.now() timeformatted = str(now.month) + "/" + str(now.day) + "/" + str( now.year) + " " + str(now.hour) + ":" + str(now.minute) + ":" + str( now.second) #building the pay load for kinesis puts. putString = str( userid) + ',' + 'www.' + urls[x] + '/' + y + ',' + timeformatted patitionKey = random.choice('abcdefghij') # schema of the imput string now userid,url,timestamp print putString result = kinesis.put_record(kinesisStreamName, putString, patitionKey) print result
# Get our instance ID out of the metadata instance_id = instance_metadata['instance-id'] # Use the instance ID as our stream name stream_name = instance_id # Use only one shard shard_count = 1 try: # Connect to Kinesis kinesis = boto.connect_kinesis() # Create the stream for this instance ID kinesis.create_stream(stream_name, shard_count) except boto.kinesis.exceptions.ResourceInUseException: # Stream has already been created, this can be safely ignored pass # Read data from stdin line = sys.stdin.readline() # Loop until there is no data left while line: # Put the data into Kinesis kinesis.put_record(stream_name, line, stream_name) # Read the next line line = sys.stdin.readline()
from boto import kinesis import testdata import json kinesis = kinesis.connect_to_region("us-east-1") class Users(testdata.DictFactory): firstname = testdata.FakeDataFactory('firstName') lastname = testdata.FakeDataFactory('lastName') age = testdata.RandomInteger(10, 30) gender = testdata.RandomSelection(['female', 'male']) for user in Users().generate(10): print(user) kinesis.put_record("push-notifications", json.dumps(user), "123")
# import testdata import json from boto import kinesis import sys # seed the pseudorandom number generator from random import seed from random import randint import time import random kinesis = kinesis.connect_to_region("us-east-2") print(kinesis.list_streams()) seed(1) i = 0 while 1 == 1: new_dict = {} new_dict["timestamp"] = int(time.time()) new_dict["dataNum"] = "data" + str(i) new_dict["device_name"] = "dev" new_dict["HeartRate"] = random.randint(60, 120) print("loading ", json.dumps(new_dict)) kinesis.put_record("end-stream", json.dumps(new_dict), "partitionkey") time.sleep(0.2) i += 1
kinesis = kinesis.connect_to_region(region) # generating data and feeding kinesis. while True: y = random_generator(10,"techsummit2015") urls = ['foo.com','amazon.com','testing.com','google.com','sydney.com'] x = random.randint(0,4) userid = random.randint(25,35)+1200 now = datetime.now() timeformatted = str(now.month) + "/" + str(now.day) + "/" + str(now.year) + " " + str(now.hour) + ":" +str(now.minute) + ":" + str(now.second) #building the pay load for kinesis puts. putString = str(userid)+','+'www.'+urls[x]+'/'+y+','+timeformatted patitionKey = random.choice('abcdefghij') # schema of the imput string now userid,url,timestamp print putString result = kinesis.put_record(kinesisStreamName,putString,patitionKey) print result
from lib.users import Users import json from boto import kinesis user = "******" password = "******" interval = 1500 count = 100 stream_name = "TestStream" u = Users(user, password, interval, count) x = u.list() for line in x.iter_lines(): kinesis = kinesis.connect_to_region("eu-west-1") kinesis.put_record(stream_name, line, "partitionkey") if line: print (line)
import testdata import json from boto import kinesis kinesis = kinesis.connect_to_region("eu-west-1") class Users(testdata.DictFactory): firstname = testdata.FakeDataFactory('firstName') lastname = testdata.FakeDataFactory('lastName') age = testdata.RandomInteger(10, 30) gender = testdata.RandomSelection(['female', 'male']) for user in Users().generate(5000): print(user) kinesis.put_record("BotoDemo", json.dumps(user), "partitionkey")
import json import datetime import random import testdata from boto import kinesis kinesis = kinesis.connect_to_region("us-west-2") def getData(iotName, lowVal, highVal): data = {} data["iotName"] = iotName data["iotValue"] = random.randint(lowVal, highVal) return data while 1: rnd = random.random() if (rnd < 0.01): data = json.dumps(getData("DemoSensor", 100, 120)) kinesis.put_record("RawStreamData", data, "DemoSensor") print '***************************** anomaly ************************* ' + data else: data = json.dumps(getData("DemoSensor", 10, 20)) kinesis.put_record("RawStreamData", data, "DemoSensor") print data
bruce = "awesome" while bruce == "awesome": with open("Kinesis_Test_Data.csv", 'rb') as source_file: contents = csv.reader(source_file, delimiter=',', quotechar='|') for event in contents: data = dict() initial_uid = initial_uid + 1 data['uid'] = initial_uid data['event'] = event[0] data['timestamp'] = str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) data['unit'] = event[1] data['package'] = event[2] data['price'] = event[3] data['platform'] = event[4] json_data = json.dumps(data, ensure_ascii=False) print json_data kinesis.put_record("rawdata", json_data, "partitionkey") shard_id = 'shardId-000000000000' shard_it = kinesis.get_shard_iterator("rawdata", shard_id, "LATEST")["ShardIterator"] print shard_it