Exemplo n.º 1
0
    def __init__(self, *args, **kwargs):
        import kafka
        super(KafkaRandomReader, self).__init__(*args, **kwargs)
        brokers = self.read_option('brokers')
        group = self.read_option('group')
        topic = self.read_option('topic')

        client = kafka.KafkaClient(map(bytes, brokers))

        # TODO: Remove this comments when next steps are decided.
        # If resume is set to true, then child should not load initial offsets
        # child_loads_initial_offsets = False if settings.get('RESUME') else True

        # self.consumer = kafka.MultiProcessConsumer(client, group, topic, num_procs=1,
        #                                             child_loads_initial_offsets=child_loads_initial_offsets,
        #                                             auto_commit=False)

        self.consumer = kafka.SimpleConsumer(client,
                                             group,
                                             topic,
                                             auto_commit=False)

        self.decompress_fun = zlib.decompress
        self.processor = self.create_processor()
        self.partitions = client.get_partition_ids_for_topic(topic)

        self.logger.info('KafkaRandomReader has been initiated. '
                         'Topic: {}. Group: {}'.format(
                             self.read_option('topic'),
                             self.read_option('group')))

        self.logger.info('Running random sampling')
        self._reservoir = self.fill_reservoir()
        self.logger.info('Random sampling completed, ready to process batches')
Exemplo n.º 2
0
 def _get_server(self):
     if not self.kafka_server:
         self.kafka_server = kafka.KafkaClient("%s:%s" %
                                               (self.host, self.port))
         self.kafka_consumer = kafka.KafkaConsumer(
             self.topic,
             bootstrap_servers=["%s:%s" % (self.host, self.port)])
Exemplo n.º 3
0
 def __init__(self, kafkahost, kafkaport, kafkatopics):
     kafkaaddress = '{0}:{1}'.format(kafkahost, kafkaport)
     self.__kafkaclient = kafka.KafkaClient(kafkaaddress)
     self.__kafkaproducer = kafka.SimpleProducer(self.__kafkaclient,
                                                 async=False)
     self.__kafkatopics = kafkatopics
     self.__counter = 0
Exemplo n.º 4
0
def kafka_send(kurl, temp_fpath, format, topic, queue=None):
    try:
        kafka_python_client = kafka_python.KafkaClient(kurl)
        kafka_python_client.ensure_topic_exists(topic)
        kafka = pykafka.KafkaClient(hosts=kurl)

        publish_topic_object = kafka.topics[topic]
        # the default partitioner is random_partitioner
        producer = publish_topic_object.get_producer()

        if format == 'csv':
            with open(temp_fpath, 'r') as fp:
                text = fp.read()
                producer.produce([text])
        elif format == 'graphite':
            with open(temp_fpath, 'r') as fp:
                for line in fp.readlines():
                    producer.produce([line])
        else:
            raise EmitterUnsupportedFormat('Unsupported format: %s' % format)

        queue and queue.put((True, None))
    except Exception as e:
        if queue:
            queue.put((False, e))
        else:
            raise
    finally:
        queue and queue.close()
Exemplo n.º 5
0
 def check_connection(self):
     try:
         client = kafka.KafkaClient(**self.connection_info)
     except Exception:
         return False
     else:
         client.close()
         return True
Exemplo n.º 6
0
def connect(broker):

    kafka_client = kafka.KafkaClient(broker)
    producer = kafka.SimpleProducer(kafka_client)

    #producer = kafka.KafkaProducer(bootstrap_servers=broker)

    return producer
Exemplo n.º 7
0
 def _get_server(self):
     if not self.kafka_server:
        self.kafka_server = kafka.KafkaClient(
             "%s:%s" % (self.host, self.port))
        #self.kafka_consumer = kafka.KafkaConsumer(self.topic,bootstrap_servers = ["%s:%s" % (self.host, self.port)])
        self.kafka_consumer=kafka.KafkaConsumer(bootstrap_servers=["%s:%s" % (self.host,self.port)])
        self.kafka_consumer.assign([TopicPartition(self.topic,0)])
        self.kafka_consumer.seek_to_end()
    def testStart(self):
        self.thread.start()
        time.sleep(15)
        self.producer.stop()

        message = kafka.SimpleConsumer(kafka.KafkaClient(self.url), "group1",
                                       self.topic).get_message()
        assert message is not None
Exemplo n.º 9
0
 def _ensure_connection(self):
     if self.kafka_client:
         return
     try:
         self.kafka_client = kafka.KafkaClient(self.hostaddrs)
         self.producer = kafka.SimpleProducer(self.kafka_client)
     except KafkaError as e:
         LOG.exception(_LE("Kafka Connection is not available: %s"), e)
         self.kafka_client = None
Exemplo n.º 10
0
 def initialize(self):
     cluster = kafka.KafkaClient(leaders[0])
     self.consumer = kafka.SimpleConsumer(cluster,
                                          "default_group",
                                          "WikiTest",
                                          buffer_size=16384,
                                          max_buffer_size=(10 * 1024 *
                                                           1024))
     self.consumer.seek(0)
     self.counter = 0
Exemplo n.º 11
0
def _is_kafka_service_running():
    """Checks whether the Kafka service is running or not"""
    kafka_running = True
    try:
        broker = KAFKA_BROKER
        kafka.KafkaClient(broker)
    except KafkaError:
        # Kafka service is not running.
        kafka_running = False
    return kafka_running
Exemplo n.º 12
0
    def _ensure_connection(self):
        if self._producer:
            return

        try:
            client = kafka.KafkaClient("%s:%s" % (self._host, self._port))
            self._producer = kafka.SimpleProducer(client)
        except Exception as e:
            LOG.exception(_LE("Failed to connect to Kafka service: %s"), e)
            raise messaging.DeliveryFailure('Kafka Client is not available, '
                                            'please restart Kafka client')
Exemplo n.º 13
0
def ensure_topics_exist():
    all_topics = ["reddit", "twitter", "google-news", "hacker-news"]

    admin = kafka.admin.KafkaAdminClient(bootstrap_servers=[KAFKA_URL])
    client = kafka.KafkaClient([KAFKA_URL])
    existing_topics = client.topics
    topics = [
        kafka.admin.NewTopic(topic, 1, 1) for topic in all_topics
        if topic not in existing_topics
    ]
    admin.create_topics(topics)
Exemplo n.º 14
0
    def create_topic(self, topic):

        '''

        Create topic

        '''

        client = kafka.KafkaClient(hosts=self.server + ':9092')
        res = client.ensure_topic_exists(topic)
        return res
Exemplo n.º 15
0
    def __init__(self, num_partitions, topic, *args, **kwargs):

        self.con = kafka.KafkaClient(KAFKAHOST)
        self.client = kafka.SimpleProducer(self.con, async=False)
        self.topic = topic
        print "Set topic to %s" % self.topic
        try:
            self.bulksize = kwargs["bulksize"]
        except:
            self.bulksize = 1
        self.msg_bulk = []
Exemplo n.º 16
0
 def initialize(self):
     cluster = kafka.KafkaClient(leaders[0])
     self.consumer = kafka.SimpleConsumer(cluster,
                                          "default_group",
                                          "WikiTest",
                                          buffer_size=8192,
                                          max_buffer_size=(10 * 1024 *
                                                           1024))
     self.consumer.seek(0)
     self.counter = 0
     log.debug("Starting Kafka Consumer")
Exemplo n.º 17
0
def create_topic(topic):
    '''

    Create topic

    '''
    import kafka

    client = kafka.KafkaClient(hosts=config.get('kafka_server') + ':' +
                               config.get('kafka_port'))
    res = client.ensure_topic_exists(topic)
    return res
Exemplo n.º 18
0
    def _publish_to_kafka_no_retries(self, url):

        if kafka_python is None or pykafka is None:
            raise ImportError('Please install kafka and pykafka')

        try:
            list = url[len('kafka://'):].split('/')

            if len(list) == 2:
                kurl = list[0]
                topic = list[1]
            else:
                raise Exception(
                    'The kafka url provided does not seem to be valid: %s. '
                    'It should be something like this: '
                    'kafka://[ip|hostname]:[port]/[kafka_topic]. '
                    'For example: kafka://1.1.1.1:1234/metrics' % url)

            h = NullHandler()
            logging.getLogger('kafka').addHandler(h)

            # XXX We should definitely create a long lasting kafka client
            kafka_python_client = kafka_python.KafkaClient(kurl)
            kafka_python_client.ensure_topic_exists(topic)

            kafka = pykafka.KafkaClient(hosts=kurl)
            publish_topic_object = kafka.topics[topic]
            # the default partitioner is random_partitioner
            producer = publish_topic_object.get_producer()

            if self.format == 'csv':
                with open(self.temp_fpath, 'r') as fp:
                    text = fp.read()
                    logger.debug(producer.produce([text]))

            elif self.format == 'graphite':

                with open(self.temp_fpath, 'r') as fp:
                    for line in fp.readlines():
                        producer.produce([line])
            else:
                logger.debug('Could not send data because {0} is an unknown '
                             'format'.format(self.format))
                raise

            kafka_python_client.close()
        except Exception as e:

            # kafka.close()

            logger.debug('Could not send data to {0}: {1}'.format(url, e))
            raise
Exemplo n.º 19
0
def main():

    # Kafka information
    cluster = kafka.KafkaClient("localhost:9092")
    prod = kafka.SimpleProducer(cluster, async=False)
    topic = "my-topic"

    # Node information.  Currently hardcoded
    # TODO user should be able to specify a graph as an input to the problem
    nodeCount = 200

    # Parameter generation information
    generationType = "sine"
    avgRate = 100000  # per second average
    sinePeriod = 20  # seconds
    sinePerturbation = 0.5  # scale factor

    # debugging
    count = 0

    to = time.time()
    print(to)
    if generationType == "sine":
        periodLocation = 0
        nextParam = nextTimeSineDemand(avgRate, sinePerturbation, sinePeriod,
                                       to)
        nT = nextTime(nextParam)
    else:
        nT = nextTime(1)
    while (1):
        t = time.time()
        if t - to >= nT:
            to = t
            count = count + 1

            if count % 1000 == 0:
                print("mark")
            if generationType == "sine":
                nT = nextTime(nextParam)
                nextParam = nextTimeSineDemand(avgRate, sinePerturbation,
                                               sinePeriod, t)


#                print(nextParam)
            else:
                nT = nextTime(10000)
            n = random.randint(0, nodeCount - 1)
            prod.send_messages(topic,
                               *[str(n) + ' ' + str(int(round((to * 1000))))])
Exemplo n.º 20
0
    def _ensure_topics_exist(self):
        all_topics = ["reddit", "twitter", "google-news", "hacker-news"]

        try:
            admin = kafka.admin.KafkaAdminClient(
                bootstrap_servers=[self.KAFKA_URL])
            client = kafka.KafkaClient([self.KAFKA_URL])
            existing_topics = client.topics
            topics = [
                kafka.admin.NewTopic(topic, 1, 1) for topic in all_topics
                if topic not in existing_topics
            ]
            admin.create_topics(topics)
        except Exception as e:
            raise SinkNotAvailableError from e
Exemplo n.º 21
0
def get_kafka_client(config):
    for i in range(3):
        try:
            return kafka.KafkaClient(config[c.kafka_brokers])
        except kafka.common.KafkaUnavailableError as e:
            last_exception = e
            logger.error(
                "Failed to create kafka client, this is usually due "
                "to all of the brokers died or invalid broker "
                "IP/ports, error=%s", traceback.format_exc())
            time.sleep(i + 1)
        except Exception as e:
            last_exception = e
            logger.error("Failed to create kafka client, error=%s",
                         traceback.format_exc())
            time.sleep(i + 1)
    raise last_exception
Exemplo n.º 22
0
def run_kafka(args):
    client = kafka.KafkaClient(args.server)
    producer = kafka.SimpleProducer(client)

    # invalid message
    producer.send_messages(str('test'), json.dumps({}))

    # 3x valid messages
    for _ in xrange(3):
        producer.send_messages(str('test'), json.dumps(bs))

    hub = balog.consumers.ConsumerHub()
    hub.scan(sample)
    engine = balog.engines.KafkaEngine(hub=hub,
                                       kafka_server=args.server,
                                       group=args.group,
                                       topic=args.topic)
    engine.run()
Exemplo n.º 23
0
    def send_kafka(self, message):
        kafka_client = kafka.KafkaClient(self.config['kafka_addr'])
        server_topics = kafka_client.topic_partitions

        try:
            if not self.config['topic_name'] in server_topics:
                self.logger.info('no topic')
                admin_client = KafkaAdminClient(bootstrap_servers=self.config['kafka_addr'])
                admin_client.create_topics(self.config['topic_name'])
                self.logger.info('topic create')
            else:
                pass
        except Exception as e:
            self.logger.info('topic create error : '+str(e))

        producer = KafkaProducer(bootstrap_servers=self.config['kafka_addr'], value_serializer=lambda v: json.dumps(v).encode('utf-8'))
        producer.send(self.config['topic_name'], message)
        producer.flush()
        # self.logger.info('message send')

        return 
Exemplo n.º 24
0
import aiohttp
import kafka
from order_book import OrderBook
from order_book import kafka_send
from unicorn_binance_websocket_api.unicorn_binance_websocket_api_manager import BinanceWebSocketApiManager


class BinanceOrderBook(OrderBook):
    def __init__(self, lastUpdateId=0):
        self.lastUpdateId = lastUpdateId
        super().__init__()


host = 'localhost:9092'
producer = kafka.KafkaProducer(bootstrap_servers=host)
kafka.KafkaClient(bootstrap_servers=host).add_topic('all')

exchange = 'binance.com'
manager = BinanceWebSocketApiManager(exchange=exchange)

with open('./trading_pairs/binance.pair', 'r') as f:
    pairs = [e.replace('\n', '') for e in f.readlines()]

local_book = collections.defaultdict(BinanceOrderBook)


# retrieve orderbook snapshot
async def get_snapshot(pair, session):
    async with session.get(
            f'https://www.binance.com/api/v1/depth?symbol={pair}&limit=100'
    ) as r:
Exemplo n.º 25
0
import sys
import socket
import argparse
import io
from datetime import datetime
from collections import Counter

import kafka
import avro.schema
from avro.datafile import DataFileWriter
from avro.io import DatumWriter

kafka_endpoint = "ip-172-31-23-112:9092"
topics = ["test02"]
consumer_group = "test_kafka_consumer"
kafka_client = kafka.KafkaClient(kafka_endpoint)

topic = topics[0]
consumer = kafka.SimpleConsumer(kafka_client, consumer_group, topic)

# reeeeeewiiiiiiind
#consumer.seek(0, 0)


def dump_message(message):
    print "****"
    print(message)
    print "Message length: %s" % (len(message))
    print "* Offset *"
    print message[0]
    # get the value back out of the kafka consumer's fetched message
Exemplo n.º 26
0
kafkaIPandPort = os.environ.get("kafkaIPandPort")
intrinio_forex_key  = os.environ.get("intrinio_forex_key")

#send each quote to kafka topic
def on_quote(quote, backlog):
    print("QUOTE: " , quote, "BACKLOG LENGTH: ", backlog)
    strQuote = json.dumps(quote)
    byteQuote = strQuote.encode('utf-8')

    #sending keyed messages are in this format
    #producer.send_messages(b'my-topic', b'key1', b'some message')
    prod.send_messages(topic, quote["code"].encode("utf-8"), byteQuote)

#kafka connection 
cluster = kafka.KafkaClient(kafkaIPandPort)
prod = KeyedProducer(cluster)
topic = "forex_topic"

#intrinio connection 
options = {
    'api_key': intrinio_forex_key,
    'provider': 'fxcm',
    'on_quote': on_quote
}
client = IntrinioRealtimeClient(options)
client.join(['fxcm:pair:EUR/USD','fxcm:pair:USD/JPY', 'fxcm:pair:GBP/USD','fxcm:pair:USD/CHF', 'fxcm:pair:EUR/CHF', 'fxcm:pair:AUD/USD', 'fxcm:pair:USD/CAD', 'fxcm:pair:NZD/USD', 'fxcm:pair:EUR/GBP', 'fxcm:pair:EUR/JPY', 'fxcm:pair:GBP/JPY', 'fxcm:pair:CHF/JPY', 'fxcm:pair:GBP/CHF', 'fxcm:pair:EUR/JPY','fxcm:pair:EUR/AUD','fxcm:pair:EUR/CAD','fxcm:pair:AUD/CAD','fxcm:pair:AUD/JPY','fxcm:pair:CAD/JPY','fxcm:pair:NZD/JPY','fxcm:pair:GBP/CAD','fxcm:pair:GBP/NZD', 'fxcm:pair:GBP/AUD','fxcm:pair:AUD/NZD','fxcm:pair:USD/SEK', 'fxcm:pair:EUR/SEK', 'fxcm:pair:EUR/NOK',	'fxcm:pair:USD/NOK','fxcm:pair:USD/MXN','fxcm:pair:AUD/CHF','fxcm:pair:EUR/NZD','fxcm:pair:USD/ZAR', 'fxcm:pair:ZAR/JPY','fxcm:pair:USD/TRY','fxcm:pair:EUR/TRY', 'fxcm:pair:NZD/CHF','fxcm:pair:CAD/CHF', 'fxcm:pair:NZD/CAD', 'fxcm:pair:TRY/JPY'])
client.connect()
client.keep_alive()

Exemplo n.º 27
0
 def _get_client(self):
     if not self.kafka_client:
         self.kafka_client = kafka.KafkaClient(
             "%s:%s" % (self.host, self.port))
         self.kafka_producer = kafka.SimpleProducer(self.kafka_client)
Exemplo n.º 28
0
 def __init__(self, hosts):
     """
     """
     self.hosts = hosts
     self.client = kafka.KafkaClient(hosts=self.hosts)
Exemplo n.º 29
0
class StdOutListener(StreamListener):
    def on_data(self, data):
        producer.send_messages(topic, data.encode('utf-8'))
        print("Tweet Sent")
        return True

    def on_error(self, status):
        print(status)


## Set access keys from Twitter Developer app info
with open('/home/n/opt/MindBender_BD/Misc/keys') as keys:
    twitter_keys = json.load(keys)
    consumer_key = twitter_keys["twitter"]["consumer_key"]
    consumer_secret = twitter_keys["twitter"]["consumer_secret"]
    access_token = twitter_keys["twitter"]["access_token"]
    access_secret = twitter_keys["twitter"]["access_secret"]

## Set authorizations
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tw.API(auth, wait_on_rate_limit=True)

## Kafka stream listener (topic created in terminal)
topic = "tweets"
kafka = kf.KafkaClient("localhost:9099")
producer = kf.SimpleProducer(kafka)
listen = StdOutListener()
stream = tw.Stream(auth, listen)
stream.filter(track="bigdata")
Exemplo n.º 30
0
words_set = words_all.split(" ")

words_dict = dict()

for x in words_set:
    ## Filter out blanks, single letters and simple sentence constructors (i.e. "a", "it", "I", "and", etc.)
    if len(x) > 3:
    	words_dict[x] = words_all.count(x)

with open(keywords, "a") as f:
    for word in sorted(words_dict, key=words_dict.get, reverse=True)[:10]:
        f.write(word+"\n")

## Send via Kafka broker
kafka = kf.KafkaClient("localhost:9099,localhost:9092,localhost:9093")
producer = kf.SimpleProducer(kafka)

## Send top 10 words to kafka consumer in JSON, formatted {date, word, freq}
for word in sorted(words_dict, key=words_dict.get, reverse=True)[:10]:
    json = '{"date": "'+timestr+'", "word":"'+word+'", "frequency": "'+str(words_dict[word])+'"}'
    producer.send_messages("reddit", bytes(json, 'utf-8'))





###### Additional potential PRAW usage ######
## Grab a submission (post) from reddit by it's ID (from the URL or submission.id).
#submission = reddit.submission(id="k3u81d")