class Producer():
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500)
    def produce_msgs(self,msg_list):
	while True:
		index = random.randrange(0,999)
    		json_msg =json.dumps(msg_list[index]).encode('utf-8')
    		self.producer.send(topic, json_msg)
class Producer():
    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers=["52.41.44.90:9092","52.36.206.57:9092","52.40.205.225:9092"],acks=0,linger_ms=500)
    def produce_msgs(self,msg_list):
    while True:
        index = random.randrange(0,999)
            info = person_pb2.PersonInfo()
            serialize_protobuf (info.user.add(), msg_list[index])
            _msg = user.SerializeToString()
            self.producer.send(topic, _msg)
class Producer():

    def __init__(self):
        self.producer = KafkaProducer(bootstrap_servers = 'localhost:9092')
    def produce_msgs(self,source):
	for drop in source:
        	if 'text' in drop:
			message = json.dumps(drop)
			self.producer.send('Twitter-Stream',message)
			print(message)
			self.producer.send('message-size',len(message))
    def on_status(self, status):
        print status.text, "\n"

        #~ data ={}
        #~ data['text'] = status.text
        #~ data['created_at'] = status.created_at
        #~ data['geo'] = status.geo
        #~ data['source'] = status.source
        #~ self.db.Tweets.insert(data)

        msg =  status.text.encode('utf-8')

        producer = KafkaProducer(bootstrap_servers='0.0.0.0:9092')
        #~ producer = KafkaProducer(bootstrap_servers=['broker1:1234'])
        #print(msg)
        try:
            producer.send(b'twitterstream', msg)
        except Exception as err:
            print(err)
            return False
        return True
Пример #5
0
def main():
    s3 = boto3.resource('s3')
    bucket = s3.Bucket('nyc-tlc')
    # Iterates through all the objects, doing the pagination for you. Each obj
    # is an ObjectSummary, so it doesn't contain the body. You'll need to call
    # get to get the whole body.
    kafka_params = config('kafka')
    dataset_params = config('dataset')
    producer = KafkaProducer(bootstrap_servers=kafka_params['broker'])

    for obj in bucket.objects.all():
        key = obj.key
        print(key)
        if dataset_params['driver'] not in key:
            continue
        #building absolute file name
        file_name = 's3://nyc-tlc/' + key
        #skipping header
        firstline = True
        # Processing each row in file
        for line in smart_open(file_name):

            # print(line.decode('utf8'))
            if firstline:  # skip first line
                firstline = False
                continue

            line_split = line.decode('utf8').split(",")
            if len(line_split
                   ) < 20:  #Skipping rows with large number of columns
                continue
            if line_split[5] == '0' or line_split[6] == '0' or line_split[
                    7] == '0' or line_split[8] == '0':
                continue
            else:
                start_point = (float(line_split[5]), float(line_split[6]))
                end_point = (float(line_split[7]), float(line_split[8]))
                # print(start_point, end_point)
                intermediate_points = getEquidistantPoints(
                    start_point, end_point, 100)
                # print(intermediate_points)
                #message when trip is started
                trip_id = 'drive:' + str(datetime.now()) + ":" + str(
                    random.randint(1, 1000))
                formatted_message = format_message(trip_id, start_point,
                                                   start_point, end_point,
                                                   "New")

                producer.send(kafka_params['driver_topic'],
                              formatted_message.encode('utf8 '))
                #Simulating moving car by sending intermediate points
                for int_point in intermediate_points:
                    # print(int_point)

                    formatted_message = format_message(trip_id, start_point,
                                                       int_point, end_point,
                                                       "In Progress")

                    producer.send(kafka_params['driver_topic'],
                                  formatted_message.encode('utf8 '))
                #Ending the driver trip
                formatted_message = format_message(trip_id, start_point,
                                                   end_point, end_point,
                                                   "Closed")
                print(formatted_message.encode('utf8 '))
                producer.send(kafka_params['driver_topic'],
                              formatted_message.encode('utf8 '))
Пример #6
0
        row_i = 0
        time.sleep(config.PRODUCER_SLEEP_TIME)
        #print('column:', column)
        for row in df[column]:
            #print('row: ', row)
            #print('col:',column)
            #timestamp = datetime.now().strftime("%H%M%S%f")
            timestamp_s = float(datetime.now().strftime("%M")) * 60 + float(
                datetime.now().strftime("%S.%f"))
            longitude = row.split(',')[0]
            latitude = row.split(',')[1]
            #user_id = filenames[row_i]
            #message_to_send = str_fmt.format(users[row_i],, timestamp, longitude, latitude, int(column==0))
            message_to_send = str_fmt.format(users[row_i], timestamp_s,
                                             longitude, latitude,
                                             int(column == 0))
            #print(users[row_i], message_to_send)

            producer.send(topic=config.KAFKA_TOPIC,
                          value=message_to_send,
                          key=users[row_i].encode('utf-8'))
            row_i += 1

    end_time = float(datetime.now().strftime("%M")) * 60 + float(
        datetime.now().strftime("%S.%f"))
    print('end time: ', end_time)
    total_time = end_time - start_time
    messages_per_second = (num_rows * num_files) / total_time
    print('total_time: ', total_time)
    print('messages_per_second: ', messages_per_second)
Пример #7
0
    logger.info(
        "Start MongoDB Change Streams Service for table {}...".format(col_tmp))

    with col.watch(pipeline, full_document='updateLookup') as stream:

        for change in stream:
            update_operations = list()
            #delete_operations = list()
            logger.info(change)
            msg = str(change.get('documentKey').get('_id')) + ',' + str(
                change.get('clusterTime').time)
            topic = change.get('ns').get('coll')
            producer = KafkaProducer(bootstrap_servers=['172.16.42.3:9092'])
            producer.send(topic,
                          key=bytes(
                              json.dumps(change.get('ns')).encode('utf-8')),
                          value=bytes(json.dumps(msg).encode('utf-8')),
                          partition=0)
            producer.close()
            record = change.get('fullDocument')
            record.pop('_id')
            record.pop('createdTime')
            update_op = UpdateOne({'id': record['id']}, {
                '$set': record,
                '$setOnInsert': {
                    'createdTime': datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
                }
            },
                                  upsert=True)
            #delete_op = DeleteOne({'id':record['id']})
            update_operations.append(update_op)
Пример #8
0
import sys
from kafka.producer import KafkaProducer

config = {
    'access_key': 'your_access_key_here',
    'access_secret': 'your_access_secret_key_here',
    'consumer_key': 'your_consumer_key_here',
    'consumer_secret': 'your_consumer_secret_key_here'
}

auth = OAuth(config["access_key"], config["access_secret"],
             config["consumer_key"], config["consumer_secret"])
stream = TwitterStream(domain='userstream.twitter.com', auth=auth, secure=True)

search_term = "coffee, cappuccino, espresso, frappuccino, mocha, \
               tea, matcha, chai, oolong, pu-erh, tisane, \
               milk, dairy, half-and-half, \
               soda, coke, cola, fanta, sprite, pepsi, Dr pepper, soft drink, \
               juice, oj, cider, \
               wine, riesling, merlot, syrah, chardonnay, sauvignon, pinot noir, \
               beer, brew, amber, ipa, bud light, budweiser, miller lite, corona extra, heineken, \
               liquor, sake, shochu, whisky, tequila, gin, cognac, rum"

tweet_iter = stream.statuses.filter(track=search_term, language='en')

producer = KafkaProducer(bootstrap_servers='your_aws_cluster_public_IP:9092')

for tweet in tweet_iter:
    print(tweet)
    producer.send('insight_topic', json.dumps(tweet))
Пример #9
0
class MyKafkaProducer(object):
    """
    class that implements Kafka producers that ingest data from S3 bucket
    """
    def __init__(self, kafka_configfile, schema_file, s3_configfile):
        """
        class constructor that initializes the instance according to the configurations
        of the S3 bucket and Kafka
        :type kafka_configfile: str     path to kafka config file
        :type schema_file     : str     path to schema file
        :type s3_configfile   : str     path to S3 config file
        """
        self.kafka_config = utility.parse_config(kafka_configfile)

        self.schema = utility.parse_config(schema_file)
        self.s3_config = utility.parse_config(s3_configfile)

        self.producer = KafkaProducer(
            bootstrap_servers=self.kafka_config["BROKERS_IP"])

    def get_key(self, msg):
        """
        produces key for message to Kafka topic
        :type msg: dict     message for which to generate the key
        :rtype   : str      key that has to be of type bytes
        """
        msgwithkey = utility.add_block_fields(msg)
        if msgwithkey is None:
            return
        x, y = msgwithkey["block_lonid"], msgwithkey["block_latid"]
        return str((x * 137 + y) % 77703).encode()

    def produce_msgs(self):
        """
        produces messages and sends them to topic
        """
        msg_cnt = 0

        while True:

            s3 = boto3.client('s3')
            # obj = s3.get_object(Bucket=self.s3_config["BUCKET"],
            #                     Key="{}/{}".format(self.s3_config["FOLDER"],
            #                                        self.s3_config["STREAMING_FILE"]))

            obj = s3.get_object(Bucket='nyctaxitrip',
                                Key="{}/{}".format(
                                    'yellow_trip',
                                    'yellow_tripdata_sample.csv'))

            for line in lazyreader.lazyread(obj['Body'], delimiter='\n'):

                message_info = line.strip()
                msg = utility.map_schema(message_info, self.schema)

                if msg is not None:
                    self.producer.send(self.kafka_config["TOPIC"],
                                       value=json.dumps(msg),
                                       key=self.get_key(msg))
                    msg_cnt += 1

                time.sleep(0.001)
Пример #10
0
class AIMSDownsamplingTCPServerConsumer:
    LOG_FORMAT ="{} UTC_TS\t"\
                "{}"
    INTERVAL = 60
    DELAY = 0
    MAX_CONNECTION = 32

    def __init__(self, kafka_host, kafka_port, tcp_host, tcp_port, topic,
                 log_topic):
        self.kafka_host = kafka_host
        self.kafka_port = kafka_port
        self.tcp_host = tcp_host
        self.tcp_port = tcp_port
        self.topic = topic
        self.log_topic = log_topic
        self.consumer = KafkaConsumer(
            topic,
            bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)],
            enable_auto_commit=False,
            max_poll_records=1024 * 1024,
            max_partition_fetch_bytes=1024 * 1024 * 100)
        self.producer = KafkaProducer(
            bootstrap_servers=["{}:{}".format(kafka_host, kafka_port)])
        self.connections = {}
        self.sample_end_time = self.get_end_time(time())
        self.lastPolled = []

    def run(self):
        self.log("running")
        asyncio.run(self._async_run())

    async def _async_run(self):
        tcpServer = await asyncio.start_server(self.connection_handler,
                                               self.tcp_host, self.tcp_port)
        await asyncio.gather(tcpServer.serve_forever(), self.poll_from_kafka())

    async def connection_handler(self, reader, writer):
        addr = str(writer.get_extra_info("peername"))
        addr = str(writer.get_extra_info("peername"))
        # A new connection, but we can accept no more
        if addr not in self.connections and \
            len(self.connections)>=self.MAX_CONNECTION:
            self.refuse_client(addr, writer)
            return
        # Add connection
        self.add_client(addr, writer)
        # Read data from connection
        remaining_data = b""
        try:
            while True:
                data = await reader.read(1)  # 1024*8 bytes
                if not data:
                    break
        except BrokenPipeError:
            """
      Catches connecton reset by peer when we are sending the batched data,
       which is also when we cannot check for reader. The broken connection
       on the writer side will ultimately lead to  BrokenPipeError on the
       reader side. Hence
      """
            pass
        finally:
            self.remove_client(addr)

    async def poll_from_kafka(self):
        polled = self.consumer.poll(timeout_ms=self.INTERVAL * 1000 / 2)
        self.lastPolled = polled
        while True:
            t = time()
            if t >= self.sample_end_time + self.DELAY:
                polled = self.consumer.poll(timeout_ms=self.INTERVAL * 1000 /
                                            2)
                lastPolled = self.lastPolled
                start_time = self.sample_end_time - self.INTERVAL
                end_time = self.sample_end_time
                self.lastPolled = polled
                self.sample_end_time = self.get_end_time(time())
                if len(self.connections) != 0:
                    # run on lastPolled first to hit the cache
                    parsed_records = self.get_parsed_records(lastPolled) + \
                                      self.get_parsed_records(polled)
                    parsed_records = list(
                        filter(
                            lambda rec: rec["observation_date_time"] is
                            not None, parsed_records))
                    ds_records = self.down_sample(parsed_records, start_time,
                                                  end_time)
                    messages = [rec["message"] for rec in ds_records]
                    for addr in self.connections.keys():
                        await self.send_or_ignore_message(addr, messages)
            await asyncio.sleep(0.1)

    def get_parsed_records(self, polled):
        # Create cache
        if "_get_parsed_records__polled" not in self.__dict__:
            self._get_parsed_records__polled = []
        if "_get_parsed_records__ret" not in self.__dict__:
            self._get_parsed_records__ret = []
        # Cache hit
        if self._get_parsed_records__polled == polled:
            return self._get_parsed_records__ret
        # Cache not hit
        self._get_parsed_records__polled = polled
        self._get_parsed_records__ret = []
        records = []
        for recordList in polled.values():
            records.extend([rec.value for rec in recordList])
        for rec in records:
            self._get_parsed_records__ret.append(self.parse_hl7(rec))
        return self._get_parsed_records__ret

    def parse_hl7(self, message):
        segments = message.decode(errors="ignore") \
                          .strip() \
                          .split(MESSAGE_SEGMENT_END_BYTE)
        location = None
        date_time = None
        observation_types = []
        observation_type = None
        for seg in segments:
            fields = seg.split('|')
            if fields[0] == "PV1":
                try:
                    location = fields[3]
                except IndexError:
                    pass
            if fields[0] == "OBR":
                try:
                    date_time = mktime(strptime(fields[7], "%Y%m%d%H%M%S"))
                except IndexError:
                    pass
            if fields[0] == "OBX":
                try:
                    observation_types.append(fields[13])
                except IndexError:
                    observation_types.append(None)
        observation_type_set = set(observation_types)
        if len(observation_type_set)==1 and \
            "APERIODIC" in observation_type_set:
            observation_type = "aperiodic"
        if len(observation_type_set)==1 and \
            None in observation_type_set:
            observation_type = "default"
        return {
            "assigned_patient_location": location,
            "observation_date_time": date_time,
            "observation_type": observation_type,
            "message": message
        }

    def down_sample(self, parsed_records, start_time, end_time):
        dt = [rec["observation_date_time"] for rec in parsed_records]
        records = []
        tmp = {}
        sorted_records = sorted(parsed_records,
                                key=lambda rec: rec["observation_date_time"])
        for rec in sorted_records:
            date_time = rec["observation_date_time"]
            location = rec["assigned_patient_location"]
            observation_type = rec["observation_type"]
            message = rec["message"]
            if date_time<start_time or \
                date_time>=end_time:
                continue
            tmp[location] = tmp.get(location, {})
            tmp[location][observation_type] = rec
        for d in tmp.values():
            for rec in d.values():
                records.append(rec)
        return records

    def log(self, msg):
        self.producer.send( self.log_topic,
                            self.LOG_FORMAT.format( datetime.now().timestamp(),
                                                    msg
                                                    ) \
                                .encode()
                            )

    def get_end_time(self, current_time):
        interval = self.INTERVAL
        return current_time - current_time % interval + interval

    async def send_or_ignore_message(self, addr, messages):
        writer = self.connections[addr]
        try:
            for msg in messages:
                writer.write(msg)
                await writer.drain()
        except ConnectionResetError:
            """
      The error is not thrown reliably. If a connection is broken, and
       one try to
          writer.write(record)
          await writer.drain()
       This error may not manifest. It is thrown more often when one try
       to repeatedly write to and drain a broken connection.
      """
            self.remove_client(addr)

    def refuse_client(self, addr, writer):
        self.log("{} refused".format(addr))
        writer.close()

    def add_client(self, addr, writer):
        if addr not in self.connections:
            self.log("{} accepted".format(addr))
            self.connections[addr] = writer
        else:
            self.remove_client(addr)
            self.add_client(addr)

    def remove_client(self, addr):
        if addr in self.connections:
            self.log("{} closed".format(addr))
            writer = self.connections.pop(addr)
            try:
                writer.close()
            except ConnectionResetError:
                pass

    def cleanup(self):
        self.log("shutdown")
        for addr in self.connections.keys():
            self.remove_client(addr)
        self.producer.flush()
        self.producer.close()
Пример #11
0
from kafka.producer import KafkaProducer

bootstrap_servers = ['localhost:9092']
topicName = 'myTopic'

producer = KafkaProducer(bootstrap_servers=bootstrap_servers)
producer = KafkaProducer()
ack = producer.send(topicName, b'Hello World!!!!!!!!')
metadata = ack.get()
print(metadata.topic)
print(metadata.partition)
Пример #12
0
              encoding='utf8') as s3_taxi_data:
        read_taxi = csv.reader(s3_taxi_data, delimiter=',')
        next(read_taxi, None)
        for line in read_taxi:
            # data cleanup; to reduce the string length of taxi id
            key_string = line[1][:10]

            line_string = ''

            # to skip invalid data
            if not line[0] or not line[1] or not line[2] or not line[
                    3] or not line[4]:
                continue
            for i in range(5):
                if i == 0 or i == 1:
                    line[i] = line[i][:10]
            # to change the time format to be recoganized in KSQL
                if i == 2 or i == 3:
                    line[i] = pd.to_datetime(line[i])
                    line[i] = line[i].strftime("%Y-%m-%d-%H:%M")
                if i == (4):
                    delimiter = ''
                else:
                    delimiter = ','
                line_string = line_string + ''.join(line[i]) + delimiter
            print(key_string)
            print(line_string)
            producer.send('topic_fatigue', value=line_string, key=key_string)
        # optional; to control ingestion rate
        # time.sleep(1)
Пример #13
0
# Get observations from AoT
observations = client.list_observations(filters=f)

# Iterate through records
try:
    for page in observations:
        print(f'Page {page_num}')
        # data_stream = []
        for obs in page.data:
            ts = ciso8601.parse_datetime(obs["timestamp"])
            prev_record_timestamp = obs["timestamp"]
            data_stream = {
                            'ts': int(time.mktime(ts.timetuple())),\
                            'node_id': obs["node_vsn"],\
                            'sensor_path': obs["sensor_path"],\
                            'value_hrf': obs["value"]\
                            }
            producer.send(topic, value=data_stream)

        # Block until all the messages have been sent
        producer.flush()
        page_num += 1

except (Exception, HTTPError) as error:
    print(error)
finally:
    # Write latest processed timestamp to the file
    fh = open("state.txt", "w+")
    fh.write(prev_record_timestamp)
    print(prev_record_timestamp)
    fh.close()
Пример #14
0
import time
import json
import boto3
from kafka.producer import KafkaProducer

if __name__ == '__main__':
    s3 = boto3.client('s3')
    producer = KafkaProducer(bootstrap_servers="127.0.0.1:9092")
    obj = s3.get_object(
        Bucket='nyctaxitrip',
        Key="{}/{}".format('yellow_trip',
                           'yellow_tripdata_sample.csv'))  # read s3 csv
    lines = str(obj['Body'].read())
    for line in lines.split("\\n"):
        print(json.dumps(line))
        producer.send(
            "new_topic",
            value=line.encode(
            ),  # encode the value to enable kafka consumer to recieve the stream msg
            key=b'key'
        )  # encode the key to enable kafka consumer to recieve the stream msg
        time.sleep(0.1)
Пример #15
0
# coding=utf-8

import logging

from kafka.producer import KafkaProducer


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)

    producer = KafkaProducer(bootstrap_servers=["192.168.120.90:9092"])
    producer.send("wangybnet", b"Hello, World!")
Пример #16
0
from kafka.producer import KafkaProducer

import ConfigParser
import socket

if __name__ == "__main__":

    config = ConfigParser.ConfigParser()
    config.read('configuration.cfg')

    urlKafkaProducer = config.get('StreamingProperties', 'URLKafkaProducer')
    topicName = config.get('StreamingProperties', 'TopicName')

    virtualMachine = 'local'
    if socket.gethostname() == 'ubuntu':
        virtualMachine = socket.gethostname()

    if virtualMachine == 'local':
        fileName = config.get('StreamingProperties', 'StreamingFileLocal')

    else:
        fileName = config.get('StreamingProperties', 'StreamingFileVirtual')

    producer = KafkaProducer(bootstrap_servers=urlKafkaProducer)

    infile = open (fileName, 'r')
    for line in infile:
        producer.send (topicName, line)
        #time.sleep(0.000000001)

    infile.close()
Пример #17
0
class IBUSStreamingDownsamplingConsumer:
    LOG_FORMAT ="{} UTC_TS\t"\
                "{}"

    def __init__(self, kafkaHost, kafkaPort, tcpHost, tcpPort, group_id, topic,
                 logTopic, interval):
        self.kafkaHost = kafkaHost
        self.kafkaPort = kafkaPort
        self.tcpHost = tcpHost
        self.tcpPort = tcpPort
        self.group_id = group_id
        self.topic = topic
        self.logTopic = logTopic
        self.interval = int(interval)
        self.consumer = KafkaConsumer(
            topic,
            bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)],
            group_id=group_id,
            enable_auto_commit=False)
        self.producer = KafkaProducer(
            bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)])
        self.tcpWriter = None

    def getTopicPartitions(self):
        self.consumer.topics()  #This ensures local cache is updated with
        # information about partitions, offsets etc.
        pids = self.consumer.partitions_for_topic(self.topic)
        tps = [TopicPartition(self.topic, pid) for pid in pids]
        return tps

    def getTopicPartitionsCommittedPositions(self):
        tps = self.getTopicPartitions()
        ret = [(tp, self.consumer.committed(tp)) for tp in tps]
        return ret

    async def tcp_server_handler(self, reader, writer):
        addr = str(writer.get_extra_info("socket").getpeername())
        if self.tcpWriter is not None:
            self.log("refused " + addr)
            writer.write(b"Connection limit reached; connection refused.")
            writer.close()
            return
        self.log("accepted " + addr)
        self.tcpWriter = writer
        t1 = asyncio.create_task(self.poll_from_Kafka(writer))
        try:
            while True:
                data = await reader.read(1)  # 1024*16 bytes
                if not data:
                    break
        except BrokenPipeError:
            """
      Catches connecton reset by peer when we are sending the batched data,
       which is also when we cannot check for reader. The broken connection
       on the writer side will ultimately lead to  BrokenPipeError on the
       reader side. Hence
      """
            pass
        finally:
            t1.cancel()
            self.log("closed " + addr)
            writer.close()
            self.tcpWriter = None

    async def poll_from_Kafka(self, writer):
        while True:
            prevPos = self.getTopicPartitionsCommittedPositions()
            polled = self.consumer.poll(timeout_ms=1000)
            records = [
                record.value for recordList in polled.values()
                for record in recordList
            ]
            try:
                for record in records:
                    writer.write(record)
                    await writer.drain()
            except ConnectionResetError:
                """
        The error is not thrown reliably. If a connection is broken, and
         one try to
            writer.write(record)
            await writer.drain()
         This error may not manifest. It is thrown more often when one try
         to repeatedly write to and drain a broken connection.
        """
                print("Last batch not fully sent, not commited.")
                for tp, pos in prevPos:
                    self.consumer.seek(tp, pos)
                break
            else:
                self.consumer.commit()
            await asyncio.sleep(self.interval)

    def log(self, msg):
        self.producer.send( self.logTopic,
                            self.LOG_FORMAT.format( datetime.now().timestamp(),
                                                    msg
                                                    ) \
                                .encode()
                            )

    def cleanup(self):
        self.log("shutdown")
        self.consumer.close()
        self.producer.flush()
        self.producer.close()

    def run(self):
        self.log("running")
        asyncio.run(self._async_run())

    async def _async_run(self):
        tcpServer = await asyncio.start_server(self.tcp_server_handler,
                                               self.tcpHost, self.tcpPort)
        await tcpServer.serve_forever()
        DataId = self.increment
        MeterCode = random.choice(self.station_list())
        TransactionId = self.increment
        TransactionDateTime = datetime.now(pacific_time).strftime(fmt)
        Amount = random.choice([0.25, 0.5, 1, 1.5, 2, 3])
        PaymentMean = random.choice(['CREDIT CARD', 'PHONE', 'CASH'])
        max_duration_sec = 60 * 60 * 6
        PaidDuration = random.randint(1, max_duration_sec)
        ElementKey = MeterCode
        record = [
            DataId, MeterCode, TransactionId, TransactionDateTime, Amount, '',
            PaymentMean, PaidDuration, ElementKey, '2019', '4', ''
        ]
        data_send = ",".join(map(str, record))

        self.increment += 1

        print(data_send)

        key = str(MeterCode).encode()
        value = data_send.encode()
        return key, value


limit = 0
datagen = GenerateData()
while True:
    producer.send('paid-transaction', *datagen.run())
    limit += 1
    time.sleep(.1)
Пример #19
0
from time import sleep
from kafka.producer import KafkaProducer
import json

producer = KafkaProducer(
    # set host and port that producer should contact to bootstrap initial cluster metadata
    bootstrap_servers=['localhost:9092'],
    # how data should be serialized before sending to broker (convert the data to a json file and encode it to utf-8)
    value_serializer=lambda x: json.dumps(x).encode('utf-8'))

for e in range(1000):
    data = {
        'number': e
    }  # key:value pairs to send (nb: this is not the topic key). Use a key for hashed-partitioning
    future = producer.send('numtest', value=data)

    # How to make sure the message is received by the broker?
    print(f'sent {data}')
    sleep(5)  # option 1: take a break

    # result = future.get(timeout=60)  # option 2: block until a single message is sent (or timeout)
    # option 3: Block until all pending messages are at least put on the network. This does not guarantee delivery
    # or success! It is really. Only useful if you configure internal batching using linger_ms
    # producer.flush()
Пример #20
0
import sys

import time
import json
import boto3
import lazyreader
#import helpers
from kafka.producer import KafkaProducer

producer = KafkaProducer(bootstrap_servers=['localhost:9092'])
while True:

    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket='nyctaxi-trip-data',
                        Key="{}/{}".format('test_data', 'test1.txt'))

    for line in lazyreader.lazyread(obj['Body'], delimiter='\n'):

        #message_info = line.strip()
        #msg = helpers.map_schema(message_info, self.schema)
        #        data = {'number' : line}

        producer.send('read_s3', value=line)

        time.sleep(0.1)