def __init__(self): #localhost:9092 = Default Zookeeper Producer Host and Port Adresses self.client = pykafka.KafkaClient("localhost:9092") #Get Producer that has topic name is Twitter self.producer = self.client.topics[bytes("twitter", "ascii")].get_producer()
def main(): es = Elasticsearch( "https://db2cb7cbe8834bb1a48f960a437f461d.us-east-1.aws.found.io:9243", http_auth=(os.environ["ELASTIC_USERNAME"], os.environ["ELASTIC_PASSWORD"]), ) client = pykafka.KafkaClient("localhost:9092") topic = client.topics[bytes("TweetStreamSentiments", "ascii")] consumer = topic.get_simple_consumer( consumer_group="mygroup", auto_offset_reset=OffsetType.LATEST, reset_offset_on_start=True, ) for msg in consumer: try: timestamp = datetime.strptime( msg.partition_key.decode().split(",")[1][1:-1], "%Y-%m-%d %H:%M:%S") eventTime = timestamp.strftime( "%Y-%m-%dT%H:%M:%S.%f%z" ) # elasticsearch is only able to identify timestamp in this format json_send_data = { "timestamp": eventTime, "sentiment_score": float(msg.value.decode()), } print(json_send_data) es.index( index="tweets_sentiment_ts", id=msg.partition_key.decode(), body=json_send_data, ) except Exception as e: print("Exception: " + str(e))
def __init__(self): print("Publish data to topic: " + topic) self.client = pykafka.KafkaClient("localhost:9092") #Get Producer that has topic name is Twitter self.producer = self.client.topics[bytes(topic, "ascii")].get_producer()
def testCrawlContainerKafka(self): env = os.environ.copy() mypath = os.path.dirname(os.path.realpath(__file__)) os.makedirs(self.tempd + '/out') # crawler itself needs to be root process = subprocess.Popen( [ '/usr/bin/python', mypath + '/../../crawler/crawler.py', '--url', 'kafka://localhost:9092/test', '--features', 'os,process', '--crawlContainers', self.container['Id'], '--crawlmode', 'OUTCONTAINER', '--numprocesses', '1' ], env=env) stdout, stderr = process.communicate() assert process.returncode == 0 print stderr print stdout kafka = pykafka.KafkaClient(hosts='localhost:9092') topic = kafka.topics['test'] consumer = topic.get_simple_consumer() message = consumer.consume() assert '"cmd":"/bin/sleep 60"' in message.value
def __init__(self, topic): """Called when initialized. This will set up a Kafka client and a producer. """ self.client = pykafka.KafkaClient('localhost:9092') self.producer = self.client.topics[topic].get_producer()
def connect(self): self.kafka_client = pykafka.KafkaClient( hosts=self.hosts, socket_timeout_ms=500, offsets_channel_socket_timeout_ms=10 * 500) self.kafka_topic = self.kafka_client.topics[self.topic] self.connected = True
def kafka_send(kurl, temp_fpath, format, topic, queue=None): try: kafka_python_client = kafka_python.KafkaClient(kurl) kafka_python_client.ensure_topic_exists(topic) kafka = pykafka.KafkaClient(hosts=kurl) publish_topic_object = kafka.topics[topic] # the default partitioner is random_partitioner producer = publish_topic_object.get_producer() if format == 'csv': with open(temp_fpath, 'r') as fp: text = fp.read() producer.produce([text]) elif format == 'graphite': with open(temp_fpath, 'r') as fp: for line in fp.readlines(): producer.produce([line]) else: raise EmitterUnsupportedFormat('Unsupported format: %s' % format) queue and queue.put((True, None)) except Exception as e: if queue: queue.put((False, e)) else: raise finally: queue and queue.close()
def main(): parser = _get_arg_parser() args = parser.parse_args() if args.command: client = pykafka.KafkaClient(hosts=args.host) args.func(client, args) else: parser.print_help()
def kafka_connect(): try: client = pykafka.KafkaClient(hosts=settings.KAFKA_HOSTS) except pykafka.exceptions.NoBrokersAvailableError: log.info('Retrying Kafka connection. . .') time.sleep(3) return kafka_connect() return client
def __init__(self): """ Instantiate a PyKafka Client and a subsequent Producer subscribed to the designated Kafka Topic (as defined in config.py) """ self.client = pykafka.KafkaClient(config.bootstrap_servers) self.producer = self.client.topics[bytes( config.twitter_kafka_topic_name, config.data_encoding)].get_producer()
def __init__(self, host, port): ''' init a new KafkaClient using given host and port @param host: host name @param port: server port ''' self.Host = host self.Port = port self.Client = pykafka.KafkaClient("%s:%s" % (self.Host, self.Port)) self.RunningThread = None
def __init__(self): ''' creates a pykafka client using the bootstrap servers, the locations of which may be found in config.py and a Kafka producer that associates the producer to the twitter_kafka_topic_name Kafka topic also defined in config.py''' self.client = pykafka.KafkaClient(config.bootstrap_servers) self.producer = self.client.topics[bytes( config.twitter_kafka_topic_name, config.data_encoding)].get_producer()
def do2(): client = pykafka.KafkaClient(hosts='10.250.100.19:9092, \ 10.250.100.20:9092') topic = client.topics['flume_test'] with topic.get_producer() as producer: i = 0 while True: print i producer.produce('simple producer %s' % i) i += 1
def __init__(self, profile: dict): sapsucker_log("Building KafkaHandler") self.profile = profile if 'broker_list' in self.profile['kafka']: broker_list = self.profile['kafka']['broker_list'] hosts = ",".join(broker_list) sapsucker_log(hosts) self.client = pykafka.KafkaClient(hosts=hosts) self.producers = {} else: raise Exception("No broker_list branch under the kafka configuration.")
def __init__(self, hosts, topic, broker_version="0.8.2"): """Construct a Producer.""" self.topic = topic self.client = pykafka.KafkaClient(hosts=','.join(hosts), socket_timeout_ms=10000, broker_version=broker_version) self.producer_topic = self.client.topics[self.topic] self.producer = self.producer_topic.get_producer(max_retries=3, linger_ms=3000, retry_backoff_ms=1000, use_rdkafka=True)
def __init__(self): self.client = pykafka.KafkaClient("localhost:9092") self.kafka_topic = "TweetStreamListener" self.producer = self.client.topics[bytes(self.kafka_topic, "ascii")].get_producer() self.es = Elasticsearch( "https://db2cb7cbe8834bb1a48f960a437f461d.us-east-1.aws.found.io:9243", http_auth=(os.environ["ELASTIC_USERNAME"], os.environ["ELASTIC_PASSWORD"]), ) print("Connecting...")
def _get_kafka_client(self): if self.config.ssl_keyfile: ssl_config = pykafka.connection.SslConfig( self.config.ssl_cafile, certfile=self.config.ssl_certfile, keyfile=self.config.ssl_keyfile, ) else: ssl_config = None return pykafka.KafkaClient(hosts=self.config.broker, ssl_config=ssl_config)
def _publish_to_kafka_no_retries(self, url): if kafka_python is None or pykafka is None: raise ImportError('Please install kafka and pykafka') try: list = url[len('kafka://'):].split('/') if len(list) == 2: kurl = list[0] topic = list[1] else: raise Exception( 'The kafka url provided does not seem to be valid: %s. ' 'It should be something like this: ' 'kafka://[ip|hostname]:[port]/[kafka_topic]. ' 'For example: kafka://1.1.1.1:1234/metrics' % url) h = NullHandler() logging.getLogger('kafka').addHandler(h) # XXX We should definitely create a long lasting kafka client kafka_python_client = kafka_python.KafkaClient(kurl) kafka_python_client.ensure_topic_exists(topic) kafka = pykafka.KafkaClient(hosts=kurl) publish_topic_object = kafka.topics[topic] # the default partitioner is random_partitioner producer = publish_topic_object.get_producer() if self.format == 'csv': with open(self.temp_fpath, 'r') as fp: text = fp.read() logger.debug(producer.produce([text])) elif self.format == 'graphite': with open(self.temp_fpath, 'r') as fp: for line in fp.readlines(): producer.produce([line]) else: logger.debug('Could not send data because {0} is an unknown ' 'format'.format(self.format)) raise kafka_python_client.close() except Exception as e: # kafka.close() logger.debug('Could not send data to {0}: {1}'.format(url, e)) raise
def make_kafka_consumer(hosts, env, topic_suffix, group): topic_name = "fatcat-{}.{}".format(env, topic_suffix).encode('utf-8') client = pykafka.KafkaClient(hosts=hosts, broker_version="1.0.0") consume_topic = client.topics[topic_name] print("Consuming from kafka topic {}, group {}".format(topic_name, group)) consumer = consume_topic.get_balanced_consumer( consumer_group=group.encode('utf-8'), managed=True, auto_commit_enable=True, auto_commit_interval_ms=30000, # 30 seconds compacted_topic=True, ) return consumer
def main(): client = pykafka.KafkaClient(hosts='10.250.100.19:9092, \ 10.250.100.20:9092') topic = client.topics['flume_test'] '''producer = topic.get_sync_producer() n = 0 while True: producer.produce('simple producer %s' % n) n += 1 ''' with topic.get_sync_producer() as producer: for i in range(4): producer.produce('test message ' + str(i**2))
def run(self): # 1. start consumer (in managed/balanced fashion, with consumer group) # 2. for each thingie, do the work; if success publish to kafka; either # way... print? log? # 3. repeat! print("Starting grobid-hbase-worker...") try: host = self.hbase_host hb_conn = happybase.Connection(host=host, transport="framed", protocol="compact") except Exception: raise Exception("Couldn't connect to HBase using host: {}".format(host)) self.hb_table = hb_conn.table(self.hbase_table_name) print("HBase inserting into {}".format(self.hbase_table_name)) kafka = pykafka.KafkaClient(hosts=self.kafka_hosts, broker_version="2.0.0") consume_topic = kafka.topics[self.consume_topic] sequential_failures = 0 consumer = consume_topic.get_balanced_consumer( consumer_group=self.consumer_group, managed=True, auto_commit_enable=True, # needed to avoid MessageSet decode errors fetch_message_max_bytes=4*1024*1024, # LATEST because best to miss processing than waste time re-process auto_offset_reset=pykafka.common.OffsetType.LATEST, compacted_topic=True) print("Kafka consuming {} in group {}".format( self.consume_topic, self.consumer_group)) for msg in consumer: #print("got a line! ") grobid_output, status = self.do_work(msg.value.decode('utf-8')) if grobid_output: sequential_failures = 0 else: sys.stderr.write("Failed to process GROBID extraction output: {}\n".format(status)) sequential_failures += 1 if sequential_failures > 20: sys.stderr.write("too many failures in a row, bailing out\n") sys.exit(-1)
def testCrawlContainerKafka2(self): emitters = EmittersManager(urls=['kafka://localhost:9092/test']) crawler = ContainersCrawler( features=['os', 'process'], user_list=self.container['Id']) worker = Worker(emitters=emitters, frequency=-1, crawler=crawler) worker.iterate() kafka = pykafka.KafkaClient(hosts='localhost:9092') topic = kafka.topics['test'] consumer = topic.get_simple_consumer() message = consumer.consume() assert '"cmd":"/bin/sleep 60"' in message.value for i in range(1, 5): worker.iterate() message = consumer.consume() assert '"cmd":"/bin/sleep 60"' in message.value
def request_scheduled(requestScheduled): logger.info("Start of scheduled request") client = pykafka.KafkaClient( hosts='{}:{}'.format(kafka_server, kafka_port)) topic = client.topics['{}'.format(kafka_topic)] producer = topic.get_sync_producer() msg = { "type": "requestScheduled", "datetime": datetime.datetime.now().strftime("%Y -%m-%dT%H:%M:%S"), "payload": requestScheduled } msg_str = json.dumps(msg) producer.produce(msg_str.encode('utf-8')) logger.info('Produced message in topic {}: {}'.format(topic, msg)) """
def __init__(self, topic, group_id, **args): super(Consumer, self).__init__('kafka', **args) if not self.closed: if 'host' not in self.__dict__ or not self.host: raise TypeError( 'KAFKA: the host has not been set in config file or parameters.' ) if 'zookeeper' not in self.__dict__ or not self.zookeeper: raise TypeError( 'KAFKA: the zookeeper has not been set in config file or parameters.' ) self.__client = pykafka.KafkaClient(hosts=self.host, zookeeper_hosts=self.zookeeper) self.__consumer = self.__client.topics[ topic.encode()].get_balanced_consumer( consumer_group=group_id.encode(), auto_commit_enable=True, fetch_message_max_bytes=67108864)
def produce_weird_messages(topic, partition, bootstrap_servers): producer = Producer({'bootstrap.servers': bootstrap_servers}) for msg in weird_messages: if 'headers' in msg: producer.produce(topic, msg['value'], key=msg['key'], headers=msg['headers'], partition=partition) else: producer.produce(topic, msg['value'], key=msg['key'], partition=partition) producer.flush() # pykafka for weird timestamps pykafkaClient = pykafka.KafkaClient(hosts=bootstrap_servers) pykafkaTopic = pykafkaClient.topics[topic] with pykafkaTopic.get_sync_producer() as pykafkaProducer: pykafkaProducer.produce(b"noneTimestamp")
def mainpro (serverip, port): global hosts hosts = "%s:%s" % (serverip, port) global client client = pykafka.KafkaClient(hosts=hosts) global spclient spclient = kafka.SimpleClient(hosts=hosts) global admin admin = kafka.admin.client.KafkaAdminClient(bootstrap_servers=hosts) zkserver = getzkserver() global myzk myzk = zkconn(zkserver) global zkdir zkdir = "/" myoptions = options() topOptions = myoptions.keys() while True: if is_sigint_up: os._exit(0) try: comm = raw_input("kafka %s>" % (port)) commlist = comm.split() if len(commlist) <= 0: continue if commlist[0] in topOptions: if commlist[0] == "help": print "[ help info ]" print toJson(myoptions) else: status = eval(commlist[0])(commlist) if status == False: print "[ %s help info ]" % (commlist[0]) print toJson(options(commlist[0])) else: print "Command error!" except: print "Execute error !" if debug: msg = traceback.format_exc() print msg os._exit(0)
def send_tweets_to_spark(http_resp, tcp_connection): for line in http_resp.iter_lines(): try: full_tweet = json.loads(line) tweet_text = full_tweet['text'] print("Tweet Text: " + tweet_text) print("------------------------------------------") #tcp_connection.send(bytes("{}\n".format(tweet_text), "utf-8")) #tcp_connection.send(tweet_text) ##### client = pykafka.KafkaClient("localhost:9092") print("Client finished") producer = client.topics[bytes("twitter", "ascii")].get_producer() producer.produce(bytes(tweet_text, "ascii")) print("Producer finished") ##### except: e = sys.exc_info()[0] print("Error: %s" % e)
def connect_to_broker(self, broker, topic): kafka_python_client = kafka_python.SimpleClient(broker) kafka_python_client.ensure_topic_exists(topic) self.client = pykafka.KafkaClient(hosts=broker) self.producer = self.client.topics[topic].get_producer()
def __init__(self): self.client = pykafka.KafkaClient("localhost:9092") self.producer = self.client.topics[bytes('twitterstream_nl','ascii')].get_producer()
self.kafkaproducer.produce(bytes(json.dumps(message), "utf-8")) except BaseException as error: print(str(error)) return True def on_error(self, status): print (status) return True if __name__ == "__main__": topic = "iot_topic" kafka_client = pykafka.KafkaClient("localhost:9092") kafka_producer = kafka_client.topics[bytes(topic, "utf-8")].get_producer() l = StdOutListener(kafka_producer) auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) stream.filter(track=['iot'], languages=["en"])