class Tail2kafka(object): def __init__(self, host, port, topic, logfile): self.host = host self.port = port self.topic = topic self.logfile = logfile self.create_kafka_producer() def create_kafka_producer(self): kafka = KafkaClient(self.host + ":" + self.port) self.producer = SimpleProducer(kafka) def log_lines_generator(self): cmd = ['tail', '-n', '0', '-F', self.logfile] process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=None) while True: line = process.stdout.readline().strip() yield line def begin_to_tail(self): try: for line in self.log_lines_generator(): self.producer.send_messages(self.topic, line) except KeyboardInterrupt, e: pass
class DmsKafkaClient(object): def __init__(self): config = ServiceContext().getConfigService() broker_list = config.get("Message", "kafka_producer") kafka = KafkaClient(broker_list) self.producer = SimpleProducer(kafka) self.zabbix_alert = config.get("Message", "zabbix_alert_topic") def sendPackageTimeout(self, accountId): message = { "accountId": accountId, "host": None, "item": None, "severity": "ERROR", "description": "account %s workflow timeout" % accountId } all = { "timestamp": 1L, "src": "rundeck", "host_ip": "10.74.113.101", "rawdata": json.dumps(message) } schema = avro.schema.parse(avro_schema) writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(all, encoder) try: self.producer.send_messages(b"%s" % self.zabbix_alert, bytes_writer.getvalue()) logger.info("send to zabbix sa successfully") except: logger.error( "occur error when send package timeout message to zabbix alert topic" )
def on_get(self, req, res): global connection, current_table # connection = happybase.Connection(config.get('default', 'HBASE_HOST')) connection.open() current_table = connection.table('userscore') req_data = { 'pclass': req.get_param('pclass'), 'age': req.get_param('age'), 'sex': req.get_param('sex'), 'fare': req.get_param('fare') } #score = req.get_param('score') or 0 #print '==============', candidate_id result = current_table.row(req_data['fare']) if not result: #request_json = { "name": candidate_id, "score": score } producer = SimpleProducer(kafka) producer.send_messages('userscore', json.dumps(req_data)) n = 3 while n >= 0: result = current_table.row(req_data['fare']) if not result: time.sleep(1) n -= 1 else: break connection.close() res.body = json.dumps(result)
def trigger(): kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) r = requests.get( "http://fintech.dataapplab.com:33334/api/v1.0/FinTech/streamingdata") print r producer.send_messages('fintech-lendingclub', r.content) data = { "bc_open_to_buy": 0, "total_il_high_credit_limit": 0, "dti": 2009, "annual_inc": 12000, "bc_util": 0, "int_rate": 10.08, "term": 36, "loan_amnt": 3500, "fund_rate": 0, "funded_amnt": 3500, } #for line in r.iter_lines(): producer.send_messages('fintech-lendingclub', json.dumps(data)) return "success" #print type(line) kafka.close()
def create_note(request): if request.method!= 'POST': return _error_response(request, "must make POST request") if 'authenticator' not in request.POST or 'title' not in request.POST or 'details' not in request.POST: return _error_response(request, "missing fields") values = { "authenticator" : request.POST['authenticator'], "title" : request.POST['title'], "details": request.POST['details'] } data = urllib.parse.urlencode(values).encode('utf-8') req = urllib.request.Request('http://models:8000/api/v1/note/create', data=data, method='POST') resp_json = urllib.request.urlopen(req).read().decode('utf-8') resp = json.loads(resp_json) if resp["ok"] is True: kafka = KafkaClient('kafka:9092') producer = SimpleProducer(kafka) note_new_listing = { "title" : request.POST['title'], "details": request.POST['details'], "id": resp["resp"]["id"] } producer.send_messages(b'note-listings-topic', json.dumps(note_new_listing).encode('utf-8')) #es_add is a temporay helper function adding listing to ES directly without working with kafka es_add_note_listing(request, resp["resp"]["id"], resp["resp"]["username"]) return _success_response(request, resp["resp"]) else: return _error_response(request, resp["error"])
class WeatherProducer(): def __init__(self): self.api_url = 'http://api.openweathermap.org/data/2.5/weather/' self.kafka = KafkaClient("localhost:9092") self.producer = SimpleProducer(self.kafka) def get_curr_weather(self): self.params = {'q': 'new york', 'appid': API_KEY} r = requests.get(url = self.api_url, params = self.params) data = r.json() timestamp = datetime.datetime.now() data['timestamp'] = timestamp return data def myconverter(self, o): if isinstance(o, datetime.datetime): return o.__str__() def send_to_kafka(self): data = self.get_curr_weather() try: self.producer.send_messages('weather', json.dumps(data, default=self.myconverter).encode('utf-8')) print("Successfully sent to kafka") except BaseException as e: print("Error on_data %s" % str(e))
class TweeterStreamListener(tweepy.StreamListener): """ A class to read the twiiter stream and push it to Kafka""" def __init__(self, api): self.api = api super(tweepy.StreamListener, self).__init__() client = KafkaClient("localhost:9092") self.producer = SimpleProducer(client, async = True, batch_send_every_n = 1000, batch_send_every_t = 10) def on_status(self, status): """ This method is called whenever new data arrives from live stream. We asynchronously push this data to kafka queue""" msg = status.text.encode('utf-8') #print(msg) try: self.producer.send_messages(b'twitterstream', msg) except Exception as e: print(e) return False return True def on_error(self, status_code): print( status_code ) print("Error received in kafka producer") return True # Don't kill the stream def on_timeout(self): return True # Don't kill the stream
class TweeterStreamProducer(): """ A class to read the tweet stream and push it to Kafka""" def __init__(self): client = KafkaClient("localhost:9092") self.producer = SimpleProducer(client, async_send = True, batch_send_every_n = 1000, batch_send_every_t = 10) def on_status(self, status): """ This method is called whenever new data arrives from live stream. We asynchronously push this data to kafka queue""" msg = status #print(msg) try: self.producer.send_messages('twitterstream', msg.encode('utf-8')) except Exception as e: print(e) return False return True def on_error(self, status_code): print("Error received in kafka producer") return True # Don't kill the stream def on_timeout(self): return True # Don't kill the stream
def test_simple_producer(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] producer = SimpleProducer(self.client, random_start=False) # Goes to first partition, randomly. resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) self.assert_produce_response(resp, start_offsets[0]) # Goes to the next partition, randomly. resp = producer.send_messages(self.topic, self.msg("three")) self.assert_produce_response(resp, start_offsets[1]) self.assert_fetch_offset( partitions[0], start_offsets[0], [self.msg("one"), self.msg("two")]) self.assert_fetch_offset(partitions[1], start_offsets[1], [self.msg("three")]) # Goes back to the first partition because there's only two partitions resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assert_produce_response(resp, start_offsets[0] + 2) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ]) producer.stop()
def send_kafka(): kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) while True: producer.send_messages("data", b'data data data') producer.send_messages("weights", b'8.46,1.74,6.08,4.25,1.92')
def get_files(): kafka = KafkaClient("129.16.125.231:9092") producer = SimpleProducer(kafka) topic = 'test' for root, dirs, files in os.walk( '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell 24h_20151215_110422/AssayPlate_NUNC_#165305-1/' ): if not files: print("files is empty") else: print("In else") print("root: ", root) print("dirs: ", dirs) print("files[0]: ", files[0]) if not dirs: print("dirs is empty") # else: print( '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell ' '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' + files[0]) for i in range(len(files)): img = cv2.imread( '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell ' '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' + files[i]) ret, jpeg = cv2.imencode('.png', img) producer.send_messages(topic, jpeg.tobytes()) kafka.close()
class KafkaMessageAdapterPreHourly(MessageAdapter): adapter_impl = None def __init__(self): client_for_writing = KafkaClient(cfg.CONF.messaging.brokers) self.producer = SimpleProducer(client_for_writing) self.topic = cfg.CONF.messaging.topic_pre_hourly @staticmethod def init(): # object to keep track of offsets KafkaMessageAdapterPreHourly.adapter_impl = simport.load( cfg.CONF.messaging.adapter_pre_hourly)() def do_send_metric(self, metric): self.producer.send_messages( self.topic, json.dumps(metric, separators=(',', ':'))) return @staticmethod def send_metric(metric): if not KafkaMessageAdapterPreHourly.adapter_impl: KafkaMessageAdapterPreHourly.init() KafkaMessageAdapterPreHourly.adapter_impl.do_send_metric(metric)
def fetchFrom(): in_kafka = KafkaClient(IN_KAFKA_HOST) consumer = SimpleConsumer(in_kafka, 'trending', CONSUMER_TOPIC, max_buffer_size=20*1024*1024) out_kafka = KafkaClient(OUT_KAFKA_HOST) producer = SimpleProducer(out_kafka) for msg in consumer: record = json.loads(msg.message.value) if 'tags' in record and '_trends' in record['tags']: try: producer.send_messages("trends", msg.message.value) print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) continue if 'metadata' in record: print record['metadata'] if 'metadata' in record and 'tags' in record['metadata'] and '_channels' in record['metadata']['tags']: try: producer.send_messages("channels", msg.message.value) print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) continue in_kafka.close() out_kafka.close()
def report(timestamp, vin, data): """ Log the location record """ conf = get_settings() kafka = None logger.info('Kafka MQ Server: Report Request: Time: %s, VIN: %s, Data: %s.', timestamp, vin, data) payload = {} payload['timestamp'] = timestamp payload['vin'] = vin payload['data'] = data # Connect to Kafka Message Queue Server try: kafka = KafkaClient(conf['TRACKING_MQ_URL']) except: logger.error("%s: Kafka Message Queue Server unavailable:", conf['TRACKING_MQ_URL']) kafka = None return False producer = SimpleProducer(kafka) producer.send_messages(conf['TRACKING_MQ_TOPIC'], json.dumps(payload)) logger.info("%s: Report data published to message queue.", conf['TRACKING_MQ_URL']) return True
def asynchronous_mode(): '''Asynchronous Mode''' from kafka import SimpleProducer, KafkaClient import logging # To send messages asynchronously kafka = KafkaClient(KAFKA_SERVER) producer = SimpleProducer(kafka, async=True) producer.send_messages(b'topic1', b'async message') # To wait for acknowledgements # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to # a local log before sending response # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed # by all in sync replicas before sending a response producer = SimpleProducer(kafka, async=False, req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE, ack_timeout=2000, sync_fail_on_error=False) responses = producer.send_messages(b'topic1', b'another message') for response in responses: logging.info(response.offset) # To send messages in batch. You can use any of the available # producers for doing this. The following producer will collect # messages in batch and send them to Kafka after 20 messages are # collected or every 60 seconds # Notes: # * If the producer dies before the messages are sent, there will be losses # * Call producer.stop() to send the messages and cleanup producer = SimpleProducer(kafka, async=True, batch_send_every_n=20, batch_send_every_t=60)
def test_kafka(request): KAFKA_BROKER_URL = "127.0.0.1:9092" KAFKA_TIMEOUT = 5 kafka_client = KafkaClient(KAFKA_BROKER_URL, timeout = KAFKA_TIMEOUT) kafka_producer = SimpleProducer(kafka_client) kafka_producer.send_messages("dummy_empty", "") return HttpResponse('', content_type="application/json")
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer(self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def populate(self, auth_info, stmt_data, payload_sha2s): if self.__class__.__name__ == 'StatementManager': stmt_data['voided'] = False self.build_verb(stmt_data) self.build_statement_object(auth_info, stmt_data) stmt_data['actor'] = Agent.objects.retrieve_or_create( **stmt_data['actor'])[0] self.build_context(stmt_data) self.build_result(stmt_data) # Substatement could not have timestamp if 'timestamp' in stmt_data: stmt_data['timestamp'] = convert_to_datetime_object( stmt_data['timestamp']) attachment_data = stmt_data.pop('attachments', None) if self.__class__.__name__ == 'StatementManager': # Save statement/substatement self.model_object = self.build_statement(auth_info, stmt_data) else: self.model_object = self.build_substatement(auth_info, stmt_data) try: kafka = SimpleClient(KAFKA_HOST) producer = SimpleProducer(kafka) producer.send_messages("StatementManager", str(self.model_object)) kafka.close() except: print('error with StatementManager Kafka') if attachment_data: self.build_attachments(auth_info, attachment_data, payload_sha2s)
def main(): ## Pass the kafka_url, e.g. `192.168.1.110:9092` kafka_url = sys.argv[1] ## Register to read messages from the "rousseau" list consumer = KafkaConsumer('rousseau', group_id='my_group', bootstrap_servers=[kafka_url]) ## Register to send to the rousseau-chain channel kafka = KafkaClient(kafka_url) producer = SimpleProducer(kafka) # Initialize a chain backed by 2 disk files c = chain(diskHashList("fentries.dat"), diskHashList("fnodes.dat")) ## The main even loop for message in consumer: # message value is raw byte string -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) seq = c.add(message.value) response = "%s|%s|%s" % (seq, hexlify(c.head()), message.value) print (response) # Note that the application is responsible for encoding messages to type bytes producer.send_messages(b'rousseau-chain', response)
def get_weather(sc): # To send messages synchronously kafka = KafkaClient('localhost:9092') producer = SimpleProducer(kafka) countriesArray = ["Singapore", "Chicago", "Madrid", "Beijing"] for country in countriesArray: # Call Weather API to get forecasts response = urllib2.urlopen( 'http://api.openweathermap.org/data/2.5/weather?q=' + country + '&appid=' + WEATHER_API_APPID) data = json.load(response) countryDataDict = {} countryDataDict["city"] = data["name"] countryDataDict["country"] = data["sys"]["country"] countryDataDict["timestamp"] = data["dt"] countryDataDict["wind_speed"] = data["wind"]["speed"] countryDataDict["visibility"] = data["visibility"] countryDataDict["weather"] = data["weather"] countryDataDict["main"] = data["main"] # Need to convert dict to bytes before sending to kafka bytesData = json.dumps(countryDataDict) producer.send_messages( b'weather', b'Weather data for ' + country + ' at ' + str(data["dt"])) producer.send_messages(b'weather', bytesData) print("Weather data for " + country + " sent to Kafka..") sc.enter(300, 1, get_weather, (sc, ))
def takePicture(): imagePath = '/tmp/image.jpg' try: os.remove(imagePath) except OSError: pass subprocess.call("chdkptp -ec -e\"rec\" -e\"rs %s\"" % (imagePath[:-4]), shell=True) if not os.path.isfile(imagePath): logging.warn("Error during taking picture") return with open(imagePath, "rb") as imageFile: imageEncoded = base64.b64encode(imageFile.read()) upload = { 'id': str(uuid.uuid4()), 'picture': imageEncoded, 'takenTime': int(time.time()), 'ride': 'cam2', } data = json.dumps(upload) logging.info("Message size %d" % len(data)) kafka = KafkaClient(kafka_server) producer = SimpleProducer(kafka) producer.send_messages(b'pictures', data)
class DmsKafkaClient(object): def __init__(self): config = ServiceContext().getConfigService() broker_list = config.get("Message","kafka_producer") kafka = KafkaClient(broker_list) self.producer = SimpleProducer(kafka) self.zabbix_alert = config.get("Message","zabbix_alert_topic") def sendPackageTimeout(self,accountId): message = { "accountId":accountId, "host":None, "item":None, "severity": "ERROR", "description": "account %s workflow timeout" % accountId } all = { "timestamp": 1L, "src": "rundeck", "host_ip": "10.74.113.101", "rawdata":json.dumps(message) } schema = avro.schema.parse(avro_schema) writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write(all,encoder) try: self.producer.send_messages(b"%s"%self.zabbix_alert,bytes_writer.getvalue()) logger.info("send to zabbix sa successfully") except: logger.error("occur error when send package timeout message to zabbix alert topic")
def create_note(request): if request.method != "POST": return _error_response(request, "must make POST request") if "authenticator" not in request.POST or "title" not in request.POST or "details" not in request.POST: return _error_response(request, "missing fields") values = { "authenticator": request.POST["authenticator"], "title": request.POST["title"], "details": request.POST["details"], } data = urllib.parse.urlencode(values).encode("utf-8") req = urllib.request.Request("http://models:8000/api/v1/note/create", data=data, method="POST") resp_json = urllib.request.urlopen(req).read().decode("utf-8") resp = json.loads(resp_json) if resp["ok"] is True: kafka = KafkaClient("kafka:9092") producer = SimpleProducer(kafka) note_new_listing = { "title": request.POST["title"], "details": request.POST["details"], "id": resp["resp"]["id"], } producer.send_messages(b"note-listings-topic", json.dumps(note_new_listing).encode("utf-8")) # es_add is a temporay helper function adding listing to ES directly without working with kafka es_add_note_listing(request, resp["resp"]["id"], resp["resp"]["username"]) return _success_response(request, resp["resp"]) else: return _error_response(request, resp["error"])
def get(address=u'', lat=0, lon=0, radius=0): # streetAddress = '1600 Pennsylvania Ave, Washington, DC' client_id = '8728ec7ee9424eb4aae9d45107ee6481' resolvedAddress = lambda: None if len(address) > 0: resolvedAddress.__dict__ = getAddressLatLon(address) lat = float(resolvedAddress.lat) lon = float(resolvedAddress.lon) radius = 1000 now = datetime.now() sixHoursEarlier = now - timedelta(hours=1) instagramReturnTuple = doInstagramMagic(resolvedAddress, client_id, sixHoursEarlier, now, lat, lon, radius) instagramJson = instagramReturnTuple[0] resolvedAddress.imageCount = instagramReturnTuple[1] topic = 'qpr.geogram' host = 'k01.istresearch.com' port = 9092 kafka = KafkaClient("%s:%i" % (host, port)) producer = SimpleProducer(kafka) message = json.dumps({"message": instagramJson}) producer.send_messages(topic, message) return json.dumps(resolvedAddress.__dict__)
def fetchFrom(): in_kafka = KafkaClient('172.31.10.154:9092') consumer = SimpleConsumer(in_kafka, 'fetcher', 'cpp.pages', max_buffer_size=20*1024*1024) out_kafka = KafkaClient("172.31.1.70:9092") producer = SimpleProducer(out_kafka) for msg in consumer: page = json.loads(msg.message.value) if 'retweet' in page['meta']: print "remove twitter page" continue output = {} output['inlink']='' output['level']=1 output['url']=page['url'] output['fts']=page['ts_fetch'] output['content']=page['content'] try: producer.send_messages("process", json.dumps(output)) print(str(time.time()) + " pump url " + output['url'].encode('utf-8')) except MessageSizeTooLargeError as err: logging.warning(err) in_kafka.close() out_kafka.close()
def genData(topic): producer = SimpleProducer(kafka, async=True) with open(source_file) as f: for line in f: print line jd = json.dumps(line) producer.send_messages(topic, line.encode('utf-8'))
def takePicture(): imagePath = '/tmp/image.jpg' try: os.remove(imagePath) except OSError: pass subprocess.call( "chdkptp -ec -e\"rec\" -e\"rs %s\"" % (imagePath[:-4]), shell=True ) if not os.path.isfile(imagePath): logging.warn("Error during taking picture") return with open(imagePath, "rb") as imageFile: imageEncoded = base64.b64encode(imageFile.read()) upload = { 'id': str(uuid.uuid4()), 'picture': imageEncoded, 'takenTime': int(time.time()), 'ride': 'cam2', } data = json.dumps(upload) logging.info("Message size %d" % len(data)) kafka = KafkaClient(kafka_server) producer = SimpleProducer(kafka) producer.send_messages(b'pictures', data)
def video_emitter(video_file, topic, producer_port=9092): # Open the video assert os.path.isfile(video_file), "Video does not exist" # Create a producer kafka = SimpleClient("localhost:%d" % producer_port) producer = SimpleProducer(kafka) logger.info("Kafka procuder created") # video = cv2.VideoCapture(video_file) logger.info("Emitting...") cnt = 1 # Count the frames while video.isOpened: success, image = video.read() if not success: # check if the file has read to the end break ret, jpeg = cv2.imencode(".png", image) logger.info("Successfully read one video frame as png. Frame count = %d" % cnt) # Convert the image to bytes and send to kafka producer.send_messages(topic, jpeg.tobytes()) time.sleep(0.2) # To reduce CPU usage logger.info("Successfully send the video frame into the producer.") cnt += 1 video.release() logger.info("Finished with emitting")
class listener(tweepy.StreamListener): def __init__(self): client = KafkaClient("localhost:9092") try: self.producer = SimpleProducer(client, async = True, batch_send_every_n = 1000, batch_send_every_t = 10) print 'Initialised' except e: print 'failed:',str(e) def on_data(self, data): #try: try: jsondata=json.loads(data) print jsondata self.producer.send_messages('trumpstream', str(data)) db.trumpdb.insert(jsondata) return True except TypeError as e: print 'TypeError:',str(e) time.sleep(5) pass def on_error(self, status): print status
def test_simple_producer(self): start_offset0 = self.current_offset(self.topic, 0) start_offset1 = self.current_offset(self.topic, 1) producer = SimpleProducer(self.client) # Goes to first partition, randomly. resp = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) self.assert_produce_response(resp, start_offset0) # Goes to the next partition, randomly. resp = producer.send_messages(self.topic, self.msg("three")) self.assert_produce_response(resp, start_offset1) self.assert_fetch_offset( 0, start_offset0, [self.msg("one"), self.msg("two")]) self.assert_fetch_offset(1, start_offset1, [self.msg("three")]) # Goes back to the first partition because there's only two partitions resp = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assert_produce_response(resp, start_offset0 + 2) self.assert_fetch_offset(0, start_offset0, [ self.msg("one"), self.msg("two"), self.msg("four"), self.msg("five") ]) producer.stop()
def main(): ## Pass the kafka_url, e.g. `192.168.1.110:9092` kafka_url = sys.argv[1] ## Register to read messages from the "rousseau" list consumer = KafkaConsumer('rousseau', group_id='my_group', bootstrap_servers=[kafka_url]) ## Register to send to the rousseau-chain channel kafka = KafkaClient(kafka_url) producer = SimpleProducer(kafka) # Initialize a chain backed by 2 disk files c = chain(diskHashList("fentries.dat"), diskHashList("fnodes.dat")) ## The main even loop for message in consumer: # message value is raw byte string -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) seq = c.add(message.value) response = "%s|%s|%s" % (seq, hexlify(c.head()), message.value) print(response) # Note that the application is responsible for encoding messages to type bytes producer.send_messages(b'rousseau-chain', response)
def test_producer_sync_fail_on_error(self): error = FailedPayloadsError('failure') with patch.object(SimpleClient, 'load_metadata_for_topics'): with patch.object(SimpleClient, 'ensure_topic_exists'): with patch.object(SimpleClient, 'get_partition_ids_for_topic', return_value=[0, 1]): with patch.object(SimpleClient, '_send_broker_aware_request', return_value=[error]): client = SimpleClient(MagicMock()) producer = SimpleProducer(client, async_send=False, sync_fail_on_error=False) # This should not raise (response, ) = producer.send_messages( 'foobar', b'test message') self.assertEqual(response, error) producer = SimpleProducer(client, async_send=False, sync_fail_on_error=True) with self.assertRaises(FailedPayloadsError): producer.send_messages('foobar', b'test message')
def send_json(data): kafka = KafkaClient('localhost:9092') # Connect to Kafka and send json producer = SimpleProducer(kafka) producer.send_messages(topic, data)
class KafkaMetricSender(MetricSender): def __init__(self, config): super(KafkaMetricSender, self).__init__(config) kafka_config = config["output"]["kafka"] # default topic # self.topic = kafka_config["topic"].encode('utf-8') # producer self.broker_list = kafka_config["brokerList"] self.kafka_client = None self.kafka_producer = None def open(self): self.kafka_client = KafkaClient(self.broker_list, timeout=59) self.kafka_producer = SimpleProducer( self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30 ) def send(self, msg, topic): self.kafka_producer.send_messages(topic, json.dumps(msg)) def close(self): if self.kafka_producer is not None: self.kafka_producer.stop() if self.kafka_client is not None: self.kafka_client.close()
def sendTweets(file, topic): data = pd.read_csv(file) #reads a csv file of tweets df = pd.DataFrame(data) df_ids = df[ 'id'] #defines a data frame object of only the row in the csv with tweetids for tweetid in df_ids: try: tweet_id = str(tweetid) kafka = KafkaClient("localhost:9092") #initializes kafka client producer = SimpleProducer( kafka, value_serializer=('utf-8')) #initializes kafka producer producer.send_messages( (str(topic)), tweet_id.encode('utf-8') ) #sends tweet ids for the given topic to a message queue print('Sending Tweet to Mempool!') print('Received at Mempool!') time.sleep(2) except tweepy.TweepError: time.sleep(60 * 15) print('this messed up') continue print("Tweets Delivered to Mempool")
class StdOutListener(StreamListener): def __init__(self): self.kafka = KafkaClient("localhost:9092") self.producer = SimpleProducer(self.kafka) self.api_url = 'http://api.openweathermap.org/data/2.5/weather/' def get_curr_weather(self): self.params = {'q': 'new york', 'appid': WEATHER_KEY} r = requests.get(url=self.api_url, params=self.params) data = r.json() return data def on_data(self, data): try: curr_weather = self.get_curr_weather() curr_tweet = json.loads(data) message = {} message.update(curr_weather) message.update(curr_tweet) data = json.dumps(message) self.producer.send_messages('tweet-weather', data.encode('utf-8')) print("Successfully sent message to kafka") except BaseException as e: print("Error on_data %s" % str(e)) return True def on_error(self, status): print(status)
async def getAccounts(base_topic): async for account in base_topic: print(account) t_account_topic = account['handle'] kafka = KafkaClient("localhost:9092") #initializes kafka client producer = SimpleProducer( kafka, value_serializer=('utf-8')) #initializes kafka producer producer.send_messages('IndividualAccounts', t_account_topic.encode('utf-8')) account_topic = app.topic(str(t_account_topic), value_serializer='json') account_topic.stream() print('New Stream Made') topic = account['handle'] chain_id = str(account['chainid']) twitter_id = str(account['twitterid']) Stream_Listener = StreamListener() #Turns Stream Listener Class On Stream_Listener.field_load(twitter_id, chain_id, topic) try: api = getTwitterCredentials( TWITTER_KEY, TWITTER_SECRET, TWITTER_APP_KEY, TWITTER_APP_SECRET) #authorize api credentials stream = tweepy.Stream(auth=api.auth, listener=Stream_Listener, aync=True) #create a stream for the account stream.filter( follow=[str(twitter_id)], is_async=True ) #listens to twitter account and triggers for only the account's tweets except Exception as ex: #error handling to restart streamer in the event of it stopping for things like Rate Limit Error print("[STREAM] Stream stopped! Reconnecting to twitter stream") print(ex) stream.filter(follow=[str(twitter_id)]) new_file = tweetFetcher( topic ) #creates a csv file of the accounts most recent tweets and filters it cwd = os.getcwd() for file in os.listdir(cwd): if file.startswith(topic): sendTweets(file, str(topic)) #writes tweets to factom the csv print('Done') print('waiting to Factomize') consumer = KafkaConsumer(str(topic), auto_offset_reset='earliest', bootstrap_servers=['localhost:9092'], consumer_timeout_ms=1000) for message in consumer: raw_tweetid = message.value tweetid = raw_tweetid.decode("utf-8") factomizeTweets((tweetid), (chain_id)) #Write tweet to Factom print(str(tweetid) + ' Success!') time.sleep(10) consumer.close()
class TwitterStreamListener(StreamListener): def __init__(self, api=None): #connect to the kafka broker #need to handle error self.topic = "tweet" self.kafka = KafkaClient("localhost:9092") self.producer = SimpleProducer(self.kafka) def on_data(self, data): if 'in_reply_to_status' in data: self.on_status(data) return True def on_status(self, data): tweet = json.loads(data) text = tweet.get('text', ' ') coord = tweet.get('coordinates', None) created_at = tweet.get('created_at', " ") id = tweet.get('id', ' ') lang = tweet.get('lang', ' ') user = tweet.get('user', "user") timestamp = tweet.get('timestamp_ms', " ") timestamp = arrow.get(timestamp) text = re.sub(r'\W+', ' ', text) lon, lat = "", "" print tweet.keys() if coord: lon = coord['coordinates'][0] lat = coord['coordinates'][1] tweet_csv = "{id}, {created_at}, {timestamp},{lang}, {lon}, {lat},{text},0".format( id=id, created_at=created_at, timestamp=timestamp, lang=lang, lon=lon, lat=lat, text=text) if lang == 'en': print tweet_csv self.producer.send_messages(self.topic, tweet_csv) else: print "not english" print tweet_csv return def on_limit(self, track): sys.stderr.write(track + "\n") return def on_error(self, status_code): sys.stderr.write('Error: ' + str(status_code) + "\n") return False def on_timeout(self): sys.stderr.write("Timeout, sleeping for 60 seconds...\n") time.sleep(60) return
def test_produce__new_topic_fails_with_reasonable_error(self): new_topic = 'new_topic_{guid}'.format(guid = str(uuid.uuid4())).encode('utf-8') producer = SimpleProducer(self.client, random_start=False) # At first it doesn't exist with self.assertRaises((UnknownTopicOrPartitionError, LeaderNotAvailableError)): producer.send_messages(new_topic, self.msg("one"))
def test_producer_random_order(self): producer = SimpleProducer(self.client, random_start=True) resp1 = producer.send_messages(self.topic, self.msg("one"), self.msg("two")) resp2 = producer.send_messages(self.topic, self.msg("three")) resp3 = producer.send_messages(self.topic, self.msg("four"), self.msg("five")) self.assertEqual(resp1[0].partition, resp3[0].partition) self.assertNotEqual(resp1[0].partition, resp2[0].partition)
def send_kafka_msg(iters): # TODO: Add try/catch statements for kafka connection kafka = KafkaClient(kafka_host) producer = SimpleProducer(kafka) for key, val in iters: msg = combine_count_json(key, val) producer.send_messages(str(topic).encode("utf-8"), str(msg).encode("utf-8")) kafka.close()
def genData(self): with open(self.source_file) as f: reader = csv.DictReader(f) crimeLocations = list(reader) kafka_cluster = self.config['kafka_cluster'] print "kafka_cluster is:" + kafka_cluster + " done"; kafka_client = KafkaClient(kafka_cluster) kafka_producer = SimpleProducer(kafka_client) # To send messages synchronously # kafkaSimple = KafkaClient('52.10.17.219:9092') # producerSimple = SimpleProducer(kafkaSimple, async=True) # geolocator = Nominatim() count = 0 while True: #while (count < 5): for loc in crimeLocations: userID = loc["userID"] userName = loc["userName"] ''' #date_rptd = loc["date_rptd"] date_rptd = str(datetime.datetime.now().month) + "/" + str(datetime.datetime.now().day) + "/" + str(datetime.datetime.now().year); #time_rptd = loc["time_rptd"] time_rptd = str(datetime.datetime.now().hour).zfill(2) + str(datetime.datetime.now().minute).zfill(2); #dateTemp = datetime.datetime.strptime(date_rptd_raw, '%m/%d/%y').strftime('%Y-%m-%d') locationObj = ""; #timestamp ''' latitude = float(loc['latitude']) longitude = float(loc['longitude']) msg = {} msg['userID'] = userID msg['userName'] = userName location = { 'latitude': latitude, 'longitude': longitude } msg['location'] = location #time.sleep(10); kafka_producer.send_messages(self.topic, json.dumps(msg)) #time.sleep(10); #producerSimple.send_messages(self.topic, json.dumps(msg)) #producerSimple.send_messages('crimeLocation1', 'tajmessage1'); print "sending location update for user %s" % userID count += 1 print "+++++++++++++FINISH ROUND %d+++++++++++++++++" % count
def timeline_producer(twitter_account, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.user_timeline(twitter_account, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
class KafkaProducer: def __init__(self): kafkahandle = KafkaClient("localhost:9092") self.producer = SimpleProducer(kafkahandle) def kafka_producer(self,topicname='harish_t',message=time.time()): _msg=str(message) #Converting to string explicitly since kafka expects string self.producer.send_messages(topicname,_msg)
def test_batched_simple_producer__triggers_by_message(self): partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] # Configure batch producer batch_messages = 5 batch_interval = 5 producer = SimpleProducer(self.client, batch_send=True, batch_send_every_n=batch_messages, batch_send_every_t=batch_interval, random_start=False) # Send 4 messages -- should not trigger a batch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) # send 3 more messages -- should trigger batch on first 5 resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # send messages groups all *msgs in a single call to the same partition # so we should see all messages from the first call in one partition self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) # Because we are batching every 5 messages, we should only see one self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), ]) producer.stop()
def query_location_producer(lat, lng, radius, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.area_search(lat, lng, radius, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
def test_batched_simple_producer__triggers_by_time(self): self.skipTest("Flakey test -- should be refactored or removed") partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [ self.current_offset(self.topic, p) for p in partitions ] batch_interval = 5 producer = SimpleProducer(self.client, async_send=True, batch_send_every_n=100, batch_send_every_t=batch_interval, random_start=False) # Send 5 messages and do a fetch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) resp = producer.send_messages( self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait the timeout out time.sleep(batch_interval) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), self.msg("six"), self.msg("seven"), ]) producer.stop()
def query_text_producer(text, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.search(text, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
class KafkaSender(LogSender): def __init__(self, config, msg_buffer, stats): LogSender.__init__(self, config=config, msg_buffer=msg_buffer, stats=stats, max_send_interval=config.get("max_send_interval", 0.3)) self.config = config self.msg_buffer = msg_buffer self.stats = stats self.kafka = None self.kafka_producer = None if not isinstance(self.config["kafka_topic"], bytes): topic = self.config["kafka_topic"].encode("utf8") self.topic = topic def _init_kafka(self): self.log.info("Initializing Kafka client, address: %r", self.config["kafka_address"]) while self.running: try: if self.kafka_producer: self.kafka_producer.stop() if self.kafka: self.kafka.close() self.kafka = KafkaClient( # pylint: disable=unexpected-keyword-arg self.config["kafka_address"], ssl=self.config.get("ssl", False), certfile=self.config.get("certfile"), keyfile=self.config.get("keyfile"), ca=self.config.get("ca") ) self.kafka_producer = SimpleProducer(self.kafka, codec=CODEC_SNAPPY if snappy else CODEC_NONE) self.log.info("Initialized Kafka Client, address: %r", self.config["kafka_address"]) break except KAFKA_CONN_ERRORS as ex: self.log.warning("Retriable error during Kafka initialization: %s: %s, sleeping", ex.__class__.__name__, ex) self.kafka = None self.kafka_producer = None time.sleep(5.0) def send_messages(self, message_batch): if not self.kafka: self._init_kafka() try: self.kafka_producer.send_messages(self.topic, *message_batch) return True except KAFKA_CONN_ERRORS as ex: self.log.info("Kafka retriable error during send: %s: %s, waiting", ex.__class__.__name__, ex) time.sleep(0.5) self._init_kafka() except Exception as ex: # pylint: disable=broad-except self.log.exception("Unexpected exception during send to kafka") self.stats.unexpected_exception(ex=ex, where="sender", tags={"app": "journalpump"}) time.sleep(5.0) self._init_kafka()
def sendMsgToKafka(obj, msg): #msg=msg #obj=obj client = KafkaClient("c9t26359.itcs.hpecorp.net:9092") # producer = SimpleProducer(client) producer.send_messages(obj, msg) #response=producer.send_messages(obj,msg) #print response client.close()
def favorite_list_producer(id, count): count = int(count) kafka = KafkaClient("localhost:9092") kafka_producer = SimpleProducer(kafka) text_list = twitter_api.favorite_list(id, count) for text in text_list: kafka_producer.send_messages("twitter",text) kafka.close() return
class TwitterStreamListener(StreamListener): def __init__(self, api = None): #connect to the kafka broker #need to handle error self.topic = "tweet" self.kafka = KafkaClient("localhost:9092") self.producer = SimpleProducer(self.kafka) def on_data(self, data): if 'in_reply_to_status' in data: self.on_status(data) return True def on_status(self, data): tweet = json.loads(data) text = tweet.get('text',' ') coord = tweet.get('coordinates', None) created_at = tweet.get('created_at'," ") id = tweet.get('id', ' ') lang = tweet.get('lang',' ') user = tweet.get('user',"user") timestamp = tweet.get('timestamp_ms'," ") timestamp = arrow.get(timestamp) text = re.sub(r'\W+', ' ', text) lon,lat = "","" print tweet.keys() if coord: lon = coord['coordinates'][0] lat = coord['coordinates'][1] tweet_csv = "{id}, {created_at}, {timestamp},{lang}, {lon}, {lat},{text},0".format(id=id,created_at=created_at,timestamp=timestamp,lang=lang, lon=lon, lat=lat,text=text) if lang == 'en': print tweet_csv self.producer.send_messages(self.topic, tweet_csv) else: print "not english" print tweet_csv return def on_limit(self, track): sys.stderr.write(track + "\n") return def on_error(self, status_code): sys.stderr.write('Error: ' + str(status_code) + "\n") return False def on_timeout(self): sys.stderr.write("Timeout, sleeping for 60 seconds...\n") time.sleep(60) return
class MessageService: def __init__(self,kafkaBroker,kafkaTopic): self.broker=kafkaBroker self.topic=kafkaTopic; self.client=KafkaClient(self.broker) self.producer=SimpleProducer(self.client) def sendMessage(self,message): self.producer.send_messages(self.topic,message)
def test_batched_simple_producer__triggers_by_time(self): self.skipTest("Flakey test -- should be refactored or removed") partitions = self.client.get_partition_ids_for_topic(self.topic) start_offsets = [self.current_offset(self.topic, p) for p in partitions] batch_interval = 5 producer = SimpleProducer( self.client, async_send=True, batch_send_every_n=100, batch_send_every_t=batch_interval, random_start=False) # Send 5 messages and do a fetch resp = producer.send_messages( self.topic, self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # It hasn't sent yet self.assert_fetch_offset(partitions[0], start_offsets[0], []) self.assert_fetch_offset(partitions[1], start_offsets[1], []) resp = producer.send_messages(self.topic, self.msg("five"), self.msg("six"), self.msg("seven"), ) # Batch mode is async. No ack self.assertEqual(len(resp), 0) # Wait the timeout out time.sleep(batch_interval) self.assert_fetch_offset(partitions[0], start_offsets[0], [ self.msg("one"), self.msg("two"), self.msg("three"), self.msg("four"), ]) self.assert_fetch_offset(partitions[1], start_offsets[1], [ self.msg("five"), self.msg("six"), self.msg("seven"), ]) producer.stop()
class kafka082_emitter(object): def __init__(self, config=None, logger=None): self.log = logger self.config = config # Hush up the kafka module's logger import logging if self.log == None: import logging self.log = logging.getLogger(__name__) self.log.setLevel(logging.DEBUG) self.log.addHandler(logging.StreamHandler()) if not self.config['interval']: self.interval = 1 else: self.interval = self.config['interval'] if (not "codec" in self.config) or (self.config['codec'] == "none"): self.codec = 0x00 self.log("debug", msg="No codec section found in config or codec value is 'none'. Falling back to NO compression for Kafka messages.") elif self.config['codec'] == "gzip": self.codec = 0x01 self.log("debug", msg="'gzip' codec selected for Kafka messages.") elif self.config['codec'] == "speedy": self.codec = 0x02 self.log("debug", msg="'speedy' codec selected for Kafka messages.") else: self.log("warn", msg="Unrecognized codec '{0}'. Falling back to NO compression for Kafka messages.".format(self.config['codec'])) self.codec = 0x00 self.brokers = self.config['brokers'] self.topic = self.config['topic'] # hush up the kafka logger logging.getLogger("kafka").setLevel(logging.INFO) self.client = KafkaClient(self.brokers) self.producer = SimpleProducer(self.client, codec=self.codec) def emit_stats(self, payload, global_iteration): # take into account custom interval, if present in config if global_iteration % self.interval: return try: self.producer.send_messages(self.topic, json.dumps(payload)) except: import sys, traceback ei = sys.exc_info() traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr) else: self.log("debug", msg="Successfully sent batch to Kafka.")
def _load_data(): """ Sends 50 messages (1 .. 50) to samza-test-topic. """ logger.info("Running test_samza_job") kafka = util.get_kafka_client() kafka.ensure_topic_exists(TEST_INPUT_TOPIC) producer = SimpleProducer(kafka, async=False, req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT, ack_timeout=30000) for i in range(1, NUM_MESSAGES + 1): producer.send_messages(TEST_INPUT_TOPIC, str(i)) kafka.close()