Пример #1
0
class Tail2kafka(object):

        def __init__(self, host, port, topic, logfile):
                self.host = host
                self.port = port
                self.topic = topic
                self.logfile = logfile
                self.create_kafka_producer()

        def create_kafka_producer(self):
                kafka = KafkaClient(self.host + ":" + self.port)
                self.producer = SimpleProducer(kafka)

        def log_lines_generator(self):
                cmd = ['tail', '-n', '0', '-F', self.logfile]
                process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=None)
                while True:
                        line = process.stdout.readline().strip()
                        yield line

        def begin_to_tail(self):
                try:
                        for line in self.log_lines_generator():
                                self.producer.send_messages(self.topic, line)
                except KeyboardInterrupt, e:
                        pass
Пример #2
0
class DmsKafkaClient(object):
    def __init__(self):
        config = ServiceContext().getConfigService()
        broker_list = config.get("Message", "kafka_producer")
        kafka = KafkaClient(broker_list)
        self.producer = SimpleProducer(kafka)
        self.zabbix_alert = config.get("Message", "zabbix_alert_topic")

    def sendPackageTimeout(self, accountId):
        message = {
            "accountId": accountId,
            "host": None,
            "item": None,
            "severity": "ERROR",
            "description": "account %s workflow timeout" % accountId
        }
        all = {
            "timestamp": 1L,
            "src": "rundeck",
            "host_ip": "10.74.113.101",
            "rawdata": json.dumps(message)
        }
        schema = avro.schema.parse(avro_schema)
        writer = avro.io.DatumWriter(schema)
        bytes_writer = io.BytesIO()
        encoder = avro.io.BinaryEncoder(bytes_writer)
        writer.write(all, encoder)
        try:
            self.producer.send_messages(b"%s" % self.zabbix_alert,
                                        bytes_writer.getvalue())
            logger.info("send to zabbix sa successfully")
        except:
            logger.error(
                "occur error when send package timeout message to zabbix alert topic"
            )
Пример #3
0
    def on_get(self, req, res):
        global connection, current_table

        # connection = happybase.Connection(config.get('default', 'HBASE_HOST'))
        connection.open()
        current_table = connection.table('userscore')

        req_data = {
            'pclass': req.get_param('pclass'),
            'age': req.get_param('age'),
            'sex': req.get_param('sex'),
            'fare': req.get_param('fare')
        }

        #score = req.get_param('score') or 0
        #print '==============', candidate_id
        result = current_table.row(req_data['fare'])

        if not result:
            #request_json = { "name": candidate_id, "score": score }
            producer = SimpleProducer(kafka)
            producer.send_messages('userscore', json.dumps(req_data))

            n = 3
            while n >= 0:
                result = current_table.row(req_data['fare'])
                if not result:
                    time.sleep(1)
                    n -= 1
                else:
                    break

        connection.close()
        res.body = json.dumps(result)
Пример #4
0
def trigger():
    kafka = KafkaClient('localhost:9092')

    producer = SimpleProducer(kafka)

    r = requests.get(
        "http://fintech.dataapplab.com:33334/api/v1.0/FinTech/streamingdata")
    print r
    producer.send_messages('fintech-lendingclub', r.content)
    data = {
        "bc_open_to_buy": 0,
        "total_il_high_credit_limit": 0,
        "dti": 2009,
        "annual_inc": 12000,
        "bc_util": 0,
        "int_rate": 10.08,
        "term": 36,
        "loan_amnt": 3500,
        "fund_rate": 0,
        "funded_amnt": 3500,
    }
    #for line in r.iter_lines():
    producer.send_messages('fintech-lendingclub', json.dumps(data))
    return "success"
    #print type(line)

    kafka.close()
Пример #5
0
def create_note(request):
	if request.method!= 'POST':
		return _error_response(request, "must make POST request")
	if 'authenticator' not in request.POST or 'title' not in request.POST or 'details' not in request.POST:
		return _error_response(request, "missing fields")
	
	values = { 
						"authenticator" : request.POST['authenticator'],
						"title" : request.POST['title'],
						"details": request.POST['details']
	}
	data = urllib.parse.urlencode(values).encode('utf-8')


	req = urllib.request.Request('http://models:8000/api/v1/note/create', data=data, method='POST')
	resp_json = urllib.request.urlopen(req).read().decode('utf-8')
	resp = json.loads(resp_json)
	if resp["ok"] is True:
		kafka = KafkaClient('kafka:9092')
		producer = SimpleProducer(kafka)
		note_new_listing = {
		"title" : request.POST['title'], 
		"details": request.POST['details'],
		"id": resp["resp"]["id"]
		}
		producer.send_messages(b'note-listings-topic', json.dumps(note_new_listing).encode('utf-8'))
	  		#es_add is a temporay helper function adding listing to ES directly without working with kafka 
		es_add_note_listing(request, resp["resp"]["id"], resp["resp"]["username"])
		return _success_response(request, resp["resp"])
	else:
		return _error_response(request, resp["error"])
Пример #6
0
class WeatherProducer():
    def __init__(self):
        self.api_url = 'http://api.openweathermap.org/data/2.5/weather/'
        self.kafka = KafkaClient("localhost:9092")
        self.producer = SimpleProducer(self.kafka)

    def get_curr_weather(self):
        self.params = {'q': 'new york', 'appid': API_KEY}
        r = requests.get(url = self.api_url, params = self.params)
        data = r.json()

        timestamp = datetime.datetime.now()
        data['timestamp'] = timestamp

        return data

    def myconverter(self, o):
        if isinstance(o, datetime.datetime):
            return o.__str__()
    def send_to_kafka(self):
        data = self.get_curr_weather()
        try:
            self.producer.send_messages('weather', json.dumps(data, default=self.myconverter).encode('utf-8'))
            print("Successfully sent to kafka")
        except BaseException as e:
            print("Error on_data %s" % str(e))
class TweeterStreamListener(tweepy.StreamListener):
    """ A class to read the twiiter stream and push it to Kafka"""

    def __init__(self, api):
        self.api = api
        super(tweepy.StreamListener, self).__init__()
        client = KafkaClient("localhost:9092")
        self.producer = SimpleProducer(client, async = True,
                          batch_send_every_n = 1000,
                          batch_send_every_t = 10)

    def on_status(self, status):
        """ This method is called whenever new data arrives from live stream.
        We asynchronously push this data to kafka queue"""
        msg =  status.text.encode('utf-8')
        #print(msg)
        try:
            self.producer.send_messages(b'twitterstream', msg)
        except Exception as e:
            print(e)
            return False
        return True

    def on_error(self, status_code):
        print( status_code )
        print("Error received in kafka producer")
        return True # Don't kill the stream

    def on_timeout(self):
        return True # Don't kill the stream
class TweeterStreamProducer():
    """ A class to read the tweet stream and push it to Kafka"""

    def __init__(self):
        client = KafkaClient("localhost:9092")
        self.producer = SimpleProducer(client, async_send = True,
                          batch_send_every_n = 1000,
                          batch_send_every_t = 10)

    def on_status(self, status):
        """ This method is called whenever new data arrives from live stream.
        We asynchronously push this data to kafka queue"""
        msg =  status
        #print(msg)
        try:
            self.producer.send_messages('twitterstream', msg.encode('utf-8'))
        except Exception as e:
            print(e)
            return False
        return True

    def on_error(self, status_code):
        print("Error received in kafka producer")
        return True # Don't kill the stream

    def on_timeout(self):
        return True # Don't kill the stream
    def test_simple_producer(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = SimpleProducer(self.client, random_start=False)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"),
                                      self.msg("two"))
        self.assert_produce_response(resp, start_offsets[0])

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
        self.assert_produce_response(resp, start_offsets[1])

        self.assert_fetch_offset(
            partitions[0], start_offsets[0],
            [self.msg("one"), self.msg("two")])
        self.assert_fetch_offset(partitions[1], start_offsets[1],
                                 [self.msg("three")])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"),
                                      self.msg("five"))
        self.assert_produce_response(resp, start_offsets[0] + 2)
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("four"),
            self.msg("five")
        ])

        producer.stop()
Пример #10
0
def send_kafka():
    kafka = KafkaClient('localhost:9092')
    producer = SimpleProducer(kafka)

    while True:
        producer.send_messages("data", b'data data data')
        producer.send_messages("weights", b'8.46,1.74,6.08,4.25,1.92')
Пример #11
0
def get_files():
    kafka = KafkaClient("129.16.125.231:9092")
    producer = SimpleProducer(kafka)
    topic = 'test'

    for root, dirs, files in os.walk(
            '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell 24h_20151215_110422/AssayPlate_NUNC_#165305-1/'
    ):
        if not files:
            print("files is empty")
        else:
            print("In else")
            print("root: ", root)
            print("dirs: ", dirs)
            print("files[0]: ", files[0])
            if not dirs:
                print("dirs is empty")
            #          else:
            print(
                '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell '
                '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' + files[0])
            for i in range(len(files)):
                img = cv2.imread(
                    '/mnt/volume/fromAl/Data_20151215 HepG2 LNP size exp live cell '
                    '24h_20151215_110422/AssayPlate_NUNC_#165305-1/' +
                    files[i])
                ret, jpeg = cv2.imencode('.png', img)
                producer.send_messages(topic, jpeg.tobytes())
        kafka.close()
Пример #12
0
class KafkaMessageAdapterPreHourly(MessageAdapter):

    adapter_impl = None

    def __init__(self):
        client_for_writing = KafkaClient(cfg.CONF.messaging.brokers)
        self.producer = SimpleProducer(client_for_writing)
        self.topic = cfg.CONF.messaging.topic_pre_hourly

    @staticmethod
    def init():
        # object to keep track of offsets
        KafkaMessageAdapterPreHourly.adapter_impl = simport.load(
            cfg.CONF.messaging.adapter_pre_hourly)()

    def do_send_metric(self, metric):
        self.producer.send_messages(
            self.topic,
            json.dumps(metric, separators=(',', ':')))
        return

    @staticmethod
    def send_metric(metric):
        if not KafkaMessageAdapterPreHourly.adapter_impl:
            KafkaMessageAdapterPreHourly.init()
        KafkaMessageAdapterPreHourly.adapter_impl.do_send_metric(metric)
Пример #13
0
def fetchFrom():
    in_kafka = KafkaClient(IN_KAFKA_HOST)
    consumer = SimpleConsumer(in_kafka, 'trending', CONSUMER_TOPIC, max_buffer_size=20*1024*1024)
    out_kafka = KafkaClient(OUT_KAFKA_HOST)
    producer = SimpleProducer(out_kafka)

    for msg in consumer:
        record = json.loads(msg.message.value)
        if 'tags' in record and '_trends' in record['tags']:
            try:
                producer.send_messages("trends", msg.message.value)
                print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8'))
            except MessageSizeTooLargeError as err:
                logging.warning(err)
            continue
        if 'metadata' in record:
            print record['metadata']
        if 'metadata' in record and 'tags' in record['metadata'] and '_channels' in record['metadata']['tags']:
            try:
                producer.send_messages("channels", msg.message.value)
                print(str(time.strftime("%c")) + " pump url " + record['inlink'].encode('utf-8'))
            except MessageSizeTooLargeError as err:
                logging.warning(err)
            continue
    in_kafka.close()
    out_kafka.close()
Пример #14
0
def report(timestamp, vin, data):
        """
        Log the location record
        """
        conf = get_settings()
        
        kafka = None
        logger.info('Kafka MQ Server: Report Request: Time: %s, VIN: %s, Data: %s.', timestamp, vin, data)
        payload = {}
        payload['timestamp'] = timestamp
        payload['vin'] = vin
        payload['data'] = data
        # Connect to Kafka Message Queue Server
       
        try:
            kafka = KafkaClient(conf['TRACKING_MQ_URL'])
        except:
            logger.error("%s: Kafka Message Queue Server unavailable:", conf['TRACKING_MQ_URL'])
            kafka = None
            return False
                
        producer = SimpleProducer(kafka)
        producer.send_messages(conf['TRACKING_MQ_TOPIC'], json.dumps(payload))
        logger.info("%s: Report data published to message queue.", conf['TRACKING_MQ_URL'])
        return True
Пример #15
0
def asynchronous_mode():
    '''Asynchronous Mode'''
    from kafka import SimpleProducer, KafkaClient
    import logging

    # To send messages asynchronously
    kafka = KafkaClient(KAFKA_SERVER)
    producer = SimpleProducer(kafka, async=True)
    producer.send_messages(b'topic1', b'async message')

    # To wait for acknowledgements
    # ACK_AFTER_LOCAL_WRITE : server will wait till the data is written to
    #                         a local log before sending response
    # ACK_AFTER_CLUSTER_COMMIT : server will block until the message is committed
    #                            by all in sync replicas before sending a response
    producer = SimpleProducer(kafka, async=False,
                              req_acks=SimpleProducer.ACK_AFTER_LOCAL_WRITE,
                              ack_timeout=2000,
                              sync_fail_on_error=False)

    responses = producer.send_messages(b'topic1', b'another message')
    for response in responses:
        logging.info(response.offset)

    # To send messages in batch. You can use any of the available
    # producers for doing this. The following producer will collect
    # messages in batch and send them to Kafka after 20 messages are
    # collected or every 60 seconds
    # Notes:
    # * If the producer dies before the messages are sent, there will be losses
    # * Call producer.stop() to send the messages and cleanup
    producer = SimpleProducer(kafka, async=True,
                              batch_send_every_n=20,
                              batch_send_every_t=60)
Пример #16
0
def test_kafka(request):
    KAFKA_BROKER_URL    = "127.0.0.1:9092"
    KAFKA_TIMEOUT       = 5
    kafka_client = KafkaClient(KAFKA_BROKER_URL, timeout = KAFKA_TIMEOUT)
    kafka_producer = SimpleProducer(kafka_client)
    kafka_producer.send_messages("dummy_empty", "")
    return HttpResponse('', content_type="application/json")
Пример #17
0
class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        # self.topic = kafka_config["topic"].encode('utf-8')
        # producer
        self.broker_list = kafka_config["brokerList"]
        self.kafka_client = None
        self.kafka_producer = None

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(self.kafka_client,
                                             batch_send=True,
                                             batch_send_every_n=500,
                                             batch_send_every_t=30)

    def send(self, msg, topic):
        self.kafka_producer.send_messages(topic, json.dumps(msg))

    def close(self):
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
Пример #18
0
    def populate(self, auth_info, stmt_data, payload_sha2s):
        if self.__class__.__name__ == 'StatementManager':
            stmt_data['voided'] = False

        self.build_verb(stmt_data)
        self.build_statement_object(auth_info, stmt_data)
        stmt_data['actor'] = Agent.objects.retrieve_or_create(
            **stmt_data['actor'])[0]
        self.build_context(stmt_data)
        self.build_result(stmt_data)
        # Substatement could not have timestamp
        if 'timestamp' in stmt_data:
            stmt_data['timestamp'] = convert_to_datetime_object(
                stmt_data['timestamp'])
        attachment_data = stmt_data.pop('attachments', None)

        if self.__class__.__name__ == 'StatementManager':
            # Save statement/substatement
            self.model_object = self.build_statement(auth_info, stmt_data)
        else:
            self.model_object = self.build_substatement(auth_info, stmt_data)
        try:
            kafka = SimpleClient(KAFKA_HOST)
            producer = SimpleProducer(kafka)
            producer.send_messages("StatementManager", str(self.model_object))
            kafka.close()
        except:
            print('error with StatementManager Kafka')
        if attachment_data:
            self.build_attachments(auth_info, attachment_data, payload_sha2s)
Пример #19
0
def main():
    ## Pass the kafka_url, e.g. `192.168.1.110:9092`
    kafka_url = sys.argv[1]

    ## Register to read messages from the "rousseau" list
    consumer = KafkaConsumer('rousseau',
                             group_id='my_group',
                             bootstrap_servers=[kafka_url])

    ## Register to send to the rousseau-chain channel
    kafka = KafkaClient(kafka_url)
    producer = SimpleProducer(kafka)

    # Initialize a chain backed by 2 disk files
    c = chain(diskHashList("fentries.dat"), diskHashList("fnodes.dat"))

    ## The main even loop
    for message in consumer:
        # message value is raw byte string -- decode if necessary!
        # e.g., for unicode: `message.value.decode('utf-8')`
        print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
                                             message.offset, message.key,
                                             message.value))

        seq = c.add(message.value)
        response = "%s|%s|%s" % (seq, hexlify(c.head()), message.value)
        print (response)

        # Note that the application is responsible for encoding messages to type bytes
        producer.send_messages(b'rousseau-chain', response)
Пример #20
0
def get_weather(sc):

    # To send messages synchronously
    kafka = KafkaClient('localhost:9092')
    producer = SimpleProducer(kafka)

    countriesArray = ["Singapore", "Chicago", "Madrid", "Beijing"]

    for country in countriesArray:

        # Call Weather API to get forecasts
        response = urllib2.urlopen(
            'http://api.openweathermap.org/data/2.5/weather?q=' + country +
            '&appid=' + WEATHER_API_APPID)
        data = json.load(response)

        countryDataDict = {}
        countryDataDict["city"] = data["name"]
        countryDataDict["country"] = data["sys"]["country"]
        countryDataDict["timestamp"] = data["dt"]
        countryDataDict["wind_speed"] = data["wind"]["speed"]
        countryDataDict["visibility"] = data["visibility"]
        countryDataDict["weather"] = data["weather"]
        countryDataDict["main"] = data["main"]

        # Need to convert dict to bytes before sending to kafka
        bytesData = json.dumps(countryDataDict)

        producer.send_messages(
            b'weather',
            b'Weather data for ' + country + ' at ' + str(data["dt"]))
        producer.send_messages(b'weather', bytesData)
        print("Weather data for " + country + " sent to Kafka..")

    sc.enter(300, 1, get_weather, (sc, ))
Пример #21
0
def takePicture():
    imagePath = '/tmp/image.jpg'
    try:
        os.remove(imagePath)
    except OSError:
        pass

    subprocess.call("chdkptp -ec -e\"rec\" -e\"rs %s\"" % (imagePath[:-4]),
                    shell=True)

    if not os.path.isfile(imagePath):
        logging.warn("Error during taking picture")
    return

    with open(imagePath, "rb") as imageFile:
        imageEncoded = base64.b64encode(imageFile.read())

        upload = {
            'id': str(uuid.uuid4()),
            'picture': imageEncoded,
            'takenTime': int(time.time()),
            'ride': 'cam2',
        }
    data = json.dumps(upload)
    logging.info("Message size %d" % len(data))

    kafka = KafkaClient(kafka_server)
    producer = SimpleProducer(kafka)
    producer.send_messages(b'pictures', data)
Пример #22
0
class DmsKafkaClient(object):
    def __init__(self):
        config = ServiceContext().getConfigService()
        broker_list = config.get("Message","kafka_producer")
        kafka = KafkaClient(broker_list)
        self.producer = SimpleProducer(kafka)
        self.zabbix_alert = config.get("Message","zabbix_alert_topic")

    def sendPackageTimeout(self,accountId):
        message = {
            "accountId":accountId,
            "host":None,
            "item":None,
            "severity": "ERROR",
            "description": "account %s workflow timeout" % accountId
        }
        all = {
            "timestamp": 1L,
            "src": "rundeck",
            "host_ip": "10.74.113.101",
            "rawdata":json.dumps(message)
        }
        schema = avro.schema.parse(avro_schema)
        writer = avro.io.DatumWriter(schema)
        bytes_writer = io.BytesIO()
        encoder = avro.io.BinaryEncoder(bytes_writer)
        writer.write(all,encoder)
        try:
            self.producer.send_messages(b"%s"%self.zabbix_alert,bytes_writer.getvalue())
            logger.info("send to zabbix sa successfully")
        except:
            logger.error("occur error when send package timeout message to zabbix alert topic")
Пример #23
0
def create_note(request):
    if request.method != "POST":
        return _error_response(request, "must make POST request")
    if "authenticator" not in request.POST or "title" not in request.POST or "details" not in request.POST:
        return _error_response(request, "missing fields")

    values = {
        "authenticator": request.POST["authenticator"],
        "title": request.POST["title"],
        "details": request.POST["details"],
    }
    data = urllib.parse.urlencode(values).encode("utf-8")

    req = urllib.request.Request("http://models:8000/api/v1/note/create", data=data, method="POST")
    resp_json = urllib.request.urlopen(req).read().decode("utf-8")
    resp = json.loads(resp_json)
    if resp["ok"] is True:
        kafka = KafkaClient("kafka:9092")
        producer = SimpleProducer(kafka)
        note_new_listing = {
            "title": request.POST["title"],
            "details": request.POST["details"],
            "id": resp["resp"]["id"],
        }
        producer.send_messages(b"note-listings-topic", json.dumps(note_new_listing).encode("utf-8"))
        # es_add is a temporay helper function adding listing to ES directly without working with kafka
        es_add_note_listing(request, resp["resp"]["id"], resp["resp"]["username"])
        return _success_response(request, resp["resp"])
    else:
        return _error_response(request, resp["error"])
Пример #24
0
def get(address=u'', lat=0, lon=0, radius=0):
    # streetAddress = '1600 Pennsylvania Ave, Washington, DC'
    client_id = '8728ec7ee9424eb4aae9d45107ee6481'

    resolvedAddress = lambda: None

    if len(address) > 0:
        resolvedAddress.__dict__ = getAddressLatLon(address)
        lat = float(resolvedAddress.lat)
        lon = float(resolvedAddress.lon)
        radius = 1000

    now = datetime.now()
    sixHoursEarlier = now - timedelta(hours=1)
    instagramReturnTuple = doInstagramMagic(resolvedAddress, client_id,
                                            sixHoursEarlier, now, lat, lon,
                                            radius)
    instagramJson = instagramReturnTuple[0]
    resolvedAddress.imageCount = instagramReturnTuple[1]

    topic = 'qpr.geogram'
    host = 'k01.istresearch.com'
    port = 9092

    kafka = KafkaClient("%s:%i" % (host, port))
    producer = SimpleProducer(kafka)

    message = json.dumps({"message": instagramJson})

    producer.send_messages(topic, message)

    return json.dumps(resolvedAddress.__dict__)
Пример #25
0
def fetchFrom():
    in_kafka = KafkaClient('172.31.10.154:9092')
    consumer = SimpleConsumer(in_kafka, 'fetcher', 'cpp.pages', max_buffer_size=20*1024*1024)
    out_kafka = KafkaClient("172.31.1.70:9092")
    producer = SimpleProducer(out_kafka)

    for msg in consumer:
        page = json.loads(msg.message.value)
	if 'retweet' in page['meta']:
	    print "remove twitter page"
	    continue
        output = {}
        output['inlink']=''
        output['level']=1
        output['url']=page['url']
        output['fts']=page['ts_fetch']
        output['content']=page['content']
        try:
            producer.send_messages("process", json.dumps(output))
            print(str(time.time()) + " pump url " + output['url'].encode('utf-8'))
        except MessageSizeTooLargeError as err:
            logging.warning(err)

    in_kafka.close()
    out_kafka.close()
def genData(topic):
	producer = SimpleProducer(kafka, async=True)
	with open(source_file) as f:
		for line in f:
			print line
			jd = json.dumps(line)
 			producer.send_messages(topic, line.encode('utf-8')) 
Пример #27
0
def get(address=u'', lat=0, lon=0, radius=0):
    # streetAddress = '1600 Pennsylvania Ave, Washington, DC'
    client_id = '8728ec7ee9424eb4aae9d45107ee6481'

    resolvedAddress = lambda: None

    if len(address) > 0:
        resolvedAddress.__dict__ = getAddressLatLon(address)
        lat = float(resolvedAddress.lat)
        lon = float(resolvedAddress.lon)
        radius = 1000

    now = datetime.now()
    sixHoursEarlier = now - timedelta(hours=1)
    instagramReturnTuple = doInstagramMagic(resolvedAddress, client_id, sixHoursEarlier, now, lat, lon, radius)
    instagramJson = instagramReturnTuple[0]
    resolvedAddress.imageCount = instagramReturnTuple[1]

    topic = 'qpr.geogram'
    host = 'k01.istresearch.com'
    port = 9092

    kafka = KafkaClient("%s:%i" % (host, port))
    producer = SimpleProducer(kafka)

    message = json.dumps({"message": instagramJson})

    producer.send_messages(topic, message)

    return json.dumps(resolvedAddress.__dict__)
Пример #28
0
def takePicture():
    imagePath = '/tmp/image.jpg'
    try:
        os.remove(imagePath)
    except OSError:
        pass

    subprocess.call(
        "chdkptp -ec -e\"rec\" -e\"rs %s\"" % (imagePath[:-4]),
        shell=True
    )

    if not os.path.isfile(imagePath):
        logging.warn("Error during taking picture")
    return

    with open(imagePath, "rb") as imageFile:
        imageEncoded = base64.b64encode(imageFile.read())

        upload = {
            'id': str(uuid.uuid4()),
            'picture': imageEncoded,
            'takenTime': int(time.time()),
            'ride': 'cam2',
        }
    data = json.dumps(upload)
    logging.info("Message size %d" % len(data))
    
    kafka = KafkaClient(kafka_server)
    producer = SimpleProducer(kafka)
    producer.send_messages(b'pictures', data)
Пример #29
0
def video_emitter(video_file, topic, producer_port=9092):

    # Open the video
    assert os.path.isfile(video_file), "Video does not exist"

    # Create a producer
    kafka = SimpleClient("localhost:%d" % producer_port)
    producer = SimpleProducer(kafka)
    logger.info("Kafka procuder created")

    #
    video = cv2.VideoCapture(video_file)
    logger.info("Emitting...")
    cnt = 1  # Count the frames
    while video.isOpened:
        success, image = video.read()
        if not success:  # check if the file has read to the end
            break
        ret, jpeg = cv2.imencode(".png", image)
        logger.info("Successfully read one video frame as png. Frame count = %d" % cnt)

        # Convert the image to bytes and send to kafka
        producer.send_messages(topic, jpeg.tobytes())
        time.sleep(0.2)  # To reduce CPU usage
        logger.info("Successfully send the video frame into the producer.")
        cnt += 1
    video.release()
    logger.info("Finished with emitting")
class listener(tweepy.StreamListener):
	def __init__(self):
		client = KafkaClient("localhost:9092")
		try:
			self.producer = SimpleProducer(client, async = True,
		                          batch_send_every_n = 1000,
								  batch_send_every_t = 10)
								  
			print 'Initialised'
		except e:
			print 'failed:',str(e)
	def on_data(self, data):
		#try:
		try:
			jsondata=json.loads(data)
			print jsondata
			self.producer.send_messages('trumpstream', str(data))
			db.trumpdb.insert(jsondata)
			return True
		except TypeError as e:
			print 'TypeError:',str(e)
			time.sleep(5)
			pass

	def on_error(self, status):

		print status
Пример #31
0
    def test_simple_producer(self):
        start_offset0 = self.current_offset(self.topic, 0)
        start_offset1 = self.current_offset(self.topic, 1)
        producer = SimpleProducer(self.client)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"),
                                      self.msg("two"))
        self.assert_produce_response(resp, start_offset0)

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
        self.assert_produce_response(resp, start_offset1)

        self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("one"), self.msg("two")])
        self.assert_fetch_offset(1, start_offset1, [self.msg("three")])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"),
                                      self.msg("five"))
        self.assert_produce_response(resp, start_offset0 + 2)
        self.assert_fetch_offset(0, start_offset0, [
            self.msg("one"),
            self.msg("two"),
            self.msg("four"),
            self.msg("five")
        ])

        producer.stop()
Пример #32
0
def main():
    ## Pass the kafka_url, e.g. `192.168.1.110:9092`
    kafka_url = sys.argv[1]

    ## Register to read messages from the "rousseau" list
    consumer = KafkaConsumer('rousseau',
                             group_id='my_group',
                             bootstrap_servers=[kafka_url])

    ## Register to send to the rousseau-chain channel
    kafka = KafkaClient(kafka_url)
    producer = SimpleProducer(kafka)

    # Initialize a chain backed by 2 disk files
    c = chain(diskHashList("fentries.dat"), diskHashList("fnodes.dat"))

    ## The main even loop
    for message in consumer:
        # message value is raw byte string -- decode if necessary!
        # e.g., for unicode: `message.value.decode('utf-8')`
        print("%s:%d:%d: key=%s value=%s" %
              (message.topic, message.partition, message.offset, message.key,
               message.value))

        seq = c.add(message.value)
        response = "%s|%s|%s" % (seq, hexlify(c.head()), message.value)
        print(response)

        # Note that the application is responsible for encoding messages to type bytes
        producer.send_messages(b'rousseau-chain', response)
Пример #33
0
    def test_producer_sync_fail_on_error(self):
        error = FailedPayloadsError('failure')
        with patch.object(SimpleClient, 'load_metadata_for_topics'):
            with patch.object(SimpleClient, 'ensure_topic_exists'):
                with patch.object(SimpleClient,
                                  'get_partition_ids_for_topic',
                                  return_value=[0, 1]):
                    with patch.object(SimpleClient,
                                      '_send_broker_aware_request',
                                      return_value=[error]):

                        client = SimpleClient(MagicMock())
                        producer = SimpleProducer(client,
                                                  async_send=False,
                                                  sync_fail_on_error=False)

                        # This should not raise
                        (response, ) = producer.send_messages(
                            'foobar', b'test message')
                        self.assertEqual(response, error)

                        producer = SimpleProducer(client,
                                                  async_send=False,
                                                  sync_fail_on_error=True)
                        with self.assertRaises(FailedPayloadsError):
                            producer.send_messages('foobar', b'test message')
Пример #34
0
def send_json(data):
    kafka = KafkaClient('localhost:9092')

    # Connect to Kafka and send json
    producer = SimpleProducer(kafka)

    producer.send_messages(topic, data)
Пример #35
0
class KafkaMetricSender(MetricSender):
    def __init__(self, config):
        super(KafkaMetricSender, self).__init__(config)
        kafka_config = config["output"]["kafka"]
        # default topic
        # self.topic = kafka_config["topic"].encode('utf-8')
        # producer
        self.broker_list = kafka_config["brokerList"]
        self.kafka_client = None
        self.kafka_producer = None

    def open(self):
        self.kafka_client = KafkaClient(self.broker_list, timeout=59)
        self.kafka_producer = SimpleProducer(
            self.kafka_client, batch_send=True, batch_send_every_n=500, batch_send_every_t=30
        )

    def send(self, msg, topic):
        self.kafka_producer.send_messages(topic, json.dumps(msg))

    def close(self):
        if self.kafka_producer is not None:
            self.kafka_producer.stop()
        if self.kafka_client is not None:
            self.kafka_client.close()
Пример #36
0
def sendTweets(file, topic):
    data = pd.read_csv(file)  #reads a csv file of tweets
    df = pd.DataFrame(data)

    df_ids = df[
        'id']  #defines a data frame object of only the row in the csv with tweetids

    for tweetid in df_ids:
        try:
            tweet_id = str(tweetid)
            kafka = KafkaClient("localhost:9092")  #initializes kafka client
            producer = SimpleProducer(
                kafka, value_serializer=('utf-8'))  #initializes kafka producer

            producer.send_messages(
                (str(topic)), tweet_id.encode('utf-8')
            )  #sends tweet ids for the given topic to a message queue
            print('Sending Tweet to Mempool!')
            print('Received at Mempool!')

            time.sleep(2)

        except tweepy.TweepError:
            time.sleep(60 * 15)
            print('this messed up')
            continue
    print("Tweets Delivered to Mempool")
Пример #37
0
class StdOutListener(StreamListener):
    def __init__(self):
        self.kafka = KafkaClient("localhost:9092")
        self.producer = SimpleProducer(self.kafka)

        self.api_url = 'http://api.openweathermap.org/data/2.5/weather/'

    def get_curr_weather(self):
        self.params = {'q': 'new york', 'appid': WEATHER_KEY}
        r = requests.get(url=self.api_url, params=self.params)
        data = r.json()

        return data

    def on_data(self, data):
        try:
            curr_weather = self.get_curr_weather()
            curr_tweet = json.loads(data)
            message = {}
            message.update(curr_weather)
            message.update(curr_tweet)
            data = json.dumps(message)
            self.producer.send_messages('tweet-weather', data.encode('utf-8'))
            print("Successfully sent message to kafka")
        except BaseException as e:
            print("Error on_data %s" % str(e))
        return True

    def on_error(self, status):
        print(status)
async def getAccounts(base_topic):
    async for account in base_topic:
        print(account)
        t_account_topic = account['handle']
        kafka = KafkaClient("localhost:9092")  #initializes kafka client
        producer = SimpleProducer(
            kafka, value_serializer=('utf-8'))  #initializes kafka producer
        producer.send_messages('IndividualAccounts',
                               t_account_topic.encode('utf-8'))
        account_topic = app.topic(str(t_account_topic),
                                  value_serializer='json')
        account_topic.stream()
        print('New Stream Made')

        topic = account['handle']
        chain_id = str(account['chainid'])
        twitter_id = str(account['twitterid'])

        Stream_Listener = StreamListener()  #Turns Stream Listener Class On
        Stream_Listener.field_load(twitter_id, chain_id, topic)

        try:
            api = getTwitterCredentials(
                TWITTER_KEY, TWITTER_SECRET, TWITTER_APP_KEY,
                TWITTER_APP_SECRET)  #authorize api credentials
            stream = tweepy.Stream(auth=api.auth,
                                   listener=Stream_Listener,
                                   aync=True)  #create a stream for the account
            stream.filter(
                follow=[str(twitter_id)], is_async=True
            )  #listens to twitter account and triggers for only the account's tweets

        except Exception as ex:  #error handling to restart streamer in the event of it stopping for things like Rate Limit Error
            print("[STREAM] Stream stopped! Reconnecting to twitter stream")
            print(ex)
            stream.filter(follow=[str(twitter_id)])

        new_file = tweetFetcher(
            topic
        )  #creates a csv file of the accounts most recent tweets and filters it
        cwd = os.getcwd()
        for file in os.listdir(cwd):
            if file.startswith(topic):
                sendTweets(file, str(topic))  #writes tweets to factom the csv
        print('Done')
        print('waiting to Factomize')
        consumer = KafkaConsumer(str(topic),
                                 auto_offset_reset='earliest',
                                 bootstrap_servers=['localhost:9092'],
                                 consumer_timeout_ms=1000)

        for message in consumer:
            raw_tweetid = message.value
            tweetid = raw_tweetid.decode("utf-8")
            factomizeTweets((tweetid), (chain_id))  #Write tweet to Factom
            print(str(tweetid) + ' Success!')
            time.sleep(10)

            consumer.close()
Пример #39
0
class TwitterStreamListener(StreamListener):
    def __init__(self, api=None):
        #connect to the kafka broker
        #need to handle error
        self.topic = "tweet"
        self.kafka = KafkaClient("localhost:9092")
        self.producer = SimpleProducer(self.kafka)

    def on_data(self, data):
        if 'in_reply_to_status' in data:
            self.on_status(data)
        return True

    def on_status(self, data):
        tweet = json.loads(data)
        text = tweet.get('text', ' ')
        coord = tweet.get('coordinates', None)
        created_at = tweet.get('created_at', " ")
        id = tweet.get('id', ' ')
        lang = tweet.get('lang', ' ')
        user = tweet.get('user', "user")
        timestamp = tweet.get('timestamp_ms', " ")
        timestamp = arrow.get(timestamp)
        text = re.sub(r'\W+', ' ', text)
        lon, lat = "", ""
        print tweet.keys()
        if coord:
            lon = coord['coordinates'][0]
            lat = coord['coordinates'][1]

        tweet_csv = "{id}, {created_at}, {timestamp},{lang}, {lon}, {lat},{text},0".format(
            id=id,
            created_at=created_at,
            timestamp=timestamp,
            lang=lang,
            lon=lon,
            lat=lat,
            text=text)

        if lang == 'en':
            print tweet_csv
            self.producer.send_messages(self.topic, tweet_csv)
        else:
            print "not english"
            print tweet_csv
        return

    def on_limit(self, track):
        sys.stderr.write(track + "\n")
        return

    def on_error(self, status_code):
        sys.stderr.write('Error: ' + str(status_code) + "\n")
        return False

    def on_timeout(self):
        sys.stderr.write("Timeout, sleeping for 60 seconds...\n")
        time.sleep(60)
        return
Пример #40
0
    def test_produce__new_topic_fails_with_reasonable_error(self):
        new_topic = 'new_topic_{guid}'.format(guid = str(uuid.uuid4())).encode('utf-8')
        producer = SimpleProducer(self.client, random_start=False)

        # At first it doesn't exist
        with self.assertRaises((UnknownTopicOrPartitionError,
                                LeaderNotAvailableError)):
            producer.send_messages(new_topic, self.msg("one"))
Пример #41
0
    def test_producer_random_order(self):
        producer = SimpleProducer(self.client, random_start=True)
        resp1 = producer.send_messages(self.topic, self.msg("one"), self.msg("two"))
        resp2 = producer.send_messages(self.topic, self.msg("three"))
        resp3 = producer.send_messages(self.topic, self.msg("four"), self.msg("five"))

        self.assertEqual(resp1[0].partition, resp3[0].partition)
        self.assertNotEqual(resp1[0].partition, resp2[0].partition)
    def test_producer_random_order(self):
        producer = SimpleProducer(self.client, random_start=True)
        resp1 = producer.send_messages(self.topic, self.msg("one"), self.msg("two"))
        resp2 = producer.send_messages(self.topic, self.msg("three"))
        resp3 = producer.send_messages(self.topic, self.msg("four"), self.msg("five"))

        self.assertEqual(resp1[0].partition, resp3[0].partition)
        self.assertNotEqual(resp1[0].partition, resp2[0].partition)
    def test_produce__new_topic_fails_with_reasonable_error(self):
        new_topic = 'new_topic_{guid}'.format(guid = str(uuid.uuid4())).encode('utf-8')
        producer = SimpleProducer(self.client, random_start=False)

        # At first it doesn't exist
        with self.assertRaises((UnknownTopicOrPartitionError,
                                LeaderNotAvailableError)):
            producer.send_messages(new_topic, self.msg("one"))
 def send_kafka_msg(iters):
     # TODO: Add try/catch statements for kafka connection
     kafka = KafkaClient(kafka_host)
     producer = SimpleProducer(kafka)
     for key, val in iters:
         msg = combine_count_json(key, val)
         producer.send_messages(str(topic).encode("utf-8"), str(msg).encode("utf-8"))
     kafka.close()
    def genData(self):
        with open(self.source_file) as f:
            reader = csv.DictReader(f)
            crimeLocations = list(reader)

        kafka_cluster = self.config['kafka_cluster']

        print "kafka_cluster is:" + kafka_cluster + " done";
        kafka_client = KafkaClient(kafka_cluster)
        kafka_producer = SimpleProducer(kafka_client)

        # To send messages synchronously
#        kafkaSimple = KafkaClient('52.10.17.219:9092')
#        producerSimple = SimpleProducer(kafkaSimple, async=True)
#	geolocator = Nominatim()
        count = 0
        while True:
        #while (count < 5):
            for loc in crimeLocations:
		
               	userID = loc["userID"]
		userName = loc["userName"]
	
		'''
		#date_rptd = loc["date_rptd"]
		date_rptd = str(datetime.datetime.now().month) + "/"  + str(datetime.datetime.now().day) + "/" + str(datetime.datetime.now().year);
		#time_rptd = loc["time_rptd"]
		time_rptd = str(datetime.datetime.now().hour).zfill(2) + str(datetime.datetime.now().minute).zfill(2);
		#dateTemp = datetime.datetime.strptime(date_rptd_raw, '%m/%d/%y').strftime('%Y-%m-%d')
		

		locationObj = "";
		#timestamp
		'''
		
                latitude = float(loc['latitude'])
                longitude = float(loc['longitude'])
                msg = {}
                msg['userID'] = userID
		msg['userName'] = userName
                location = {
                    'latitude': latitude,
                    'longitude': longitude
                }
		

                msg['location'] = location
		
		#time.sleep(10);
                kafka_producer.send_messages(self.topic, json.dumps(msg))
                #time.sleep(10);
                #producerSimple.send_messages(self.topic, json.dumps(msg))
                #producerSimple.send_messages('crimeLocation1', 'tajmessage1');
                
                print "sending location update for user %s" % userID
            count += 1

            print "+++++++++++++FINISH ROUND %d+++++++++++++++++" % count
def timeline_producer(twitter_account, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.user_timeline(twitter_account, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
Пример #47
0
class KafkaProducer:

	def __init__(self):
		kafkahandle = KafkaClient("localhost:9092")
		self.producer = SimpleProducer(kafkahandle)

	def kafka_producer(self,topicname='harish_t',message=time.time()):
		_msg=str(message)   #Converting to string explicitly since kafka expects string 
		self.producer.send_messages(topicname,_msg)
Пример #48
0
    def test_batched_simple_producer__triggers_by_message(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        # Configure batch producer
        batch_messages = 5
        batch_interval = 5
        producer = SimpleProducer(self.client,
                                  batch_send=True,
                                  batch_send_every_n=batch_messages,
                                  batch_send_every_t=batch_interval,
                                  random_start=False)

        # Send 4 messages -- should not trigger a batch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        # send 3 more messages -- should trigger batch on first 5
        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # send messages groups all *msgs in a single call to the same partition
        # so we should see all messages from the first call in one partition
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        # Because we are batching every 5 messages, we should only see one
        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
        ])

        producer.stop()
def query_location_producer(lat, lng, radius, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.area_search(lat, lng, radius, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
    def test_batched_simple_producer__triggers_by_time(self):
        self.skipTest("Flakey test -- should be refactored or removed")
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        batch_interval = 5
        producer = SimpleProducer(self.client,
                                  async_send=True,
                                  batch_send_every_n=100,
                                  batch_send_every_t=batch_interval,
                                  random_start=False)

        # Send 5 messages and do a fetch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
        time.sleep(batch_interval)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        ])

        producer.stop()
def query_text_producer(text, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.search(text, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
Пример #52
0
class KafkaSender(LogSender):
    def __init__(self, config, msg_buffer, stats):
        LogSender.__init__(self, config=config, msg_buffer=msg_buffer, stats=stats,
                           max_send_interval=config.get("max_send_interval", 0.3))
        self.config = config
        self.msg_buffer = msg_buffer
        self.stats = stats

        self.kafka = None
        self.kafka_producer = None

        if not isinstance(self.config["kafka_topic"], bytes):
            topic = self.config["kafka_topic"].encode("utf8")
        self.topic = topic

    def _init_kafka(self):
        self.log.info("Initializing Kafka client, address: %r", self.config["kafka_address"])
        while self.running:
            try:
                if self.kafka_producer:
                    self.kafka_producer.stop()
                if self.kafka:
                    self.kafka.close()

                self.kafka = KafkaClient(  # pylint: disable=unexpected-keyword-arg
                    self.config["kafka_address"],
                    ssl=self.config.get("ssl", False),
                    certfile=self.config.get("certfile"),
                    keyfile=self.config.get("keyfile"),
                    ca=self.config.get("ca")
                )
                self.kafka_producer = SimpleProducer(self.kafka, codec=CODEC_SNAPPY
                                                     if snappy else CODEC_NONE)
                self.log.info("Initialized Kafka Client, address: %r", self.config["kafka_address"])
                break
            except KAFKA_CONN_ERRORS as ex:
                self.log.warning("Retriable error during Kafka initialization: %s: %s, sleeping",
                                 ex.__class__.__name__, ex)
            self.kafka = None
            self.kafka_producer = None
            time.sleep(5.0)

    def send_messages(self, message_batch):
        if not self.kafka:
            self._init_kafka()
        try:
            self.kafka_producer.send_messages(self.topic, *message_batch)
            return True
        except KAFKA_CONN_ERRORS as ex:
            self.log.info("Kafka retriable error during send: %s: %s, waiting", ex.__class__.__name__, ex)
            time.sleep(0.5)
            self._init_kafka()
        except Exception as ex:  # pylint: disable=broad-except
            self.log.exception("Unexpected exception during send to kafka")
            self.stats.unexpected_exception(ex=ex, where="sender", tags={"app": "journalpump"})
            time.sleep(5.0)
            self._init_kafka()
Пример #53
0
def sendMsgToKafka(obj, msg):
    #msg=msg
    #obj=obj
    client = KafkaClient("c9t26359.itcs.hpecorp.net:9092")  #
    producer = SimpleProducer(client)
    producer.send_messages(obj, msg)
    #response=producer.send_messages(obj,msg)
    #print response
    client.close()
def favorite_list_producer(id, count):
    count = int(count)
    kafka = KafkaClient("localhost:9092")
    kafka_producer = SimpleProducer(kafka)
    text_list = twitter_api.favorite_list(id, count)
    for text in text_list:
        kafka_producer.send_messages("twitter",text)
    kafka.close()
    return
class TwitterStreamListener(StreamListener):
    def __init__(self, api = None):
        #connect to the kafka broker
        #need to handle error
        self.topic = "tweet"
        self.kafka = KafkaClient("localhost:9092")
        self.producer = SimpleProducer(self.kafka)


    def on_data(self, data):
        if  'in_reply_to_status' in data:
            self.on_status(data)
        return True

    def on_status(self, data):
        tweet = json.loads(data)
        text = tweet.get('text',' ')
        coord = tweet.get('coordinates', None)
        created_at = tweet.get('created_at'," ")
        id = tweet.get('id', ' ')
        lang = tweet.get('lang',' ')
        user = tweet.get('user',"user")
        timestamp = tweet.get('timestamp_ms'," ")
        timestamp = arrow.get(timestamp)
        text = re.sub(r'\W+', ' ', text)
        lon,lat = "",""
        print tweet.keys()
        if coord:
            lon = coord['coordinates'][0]
            lat = coord['coordinates'][1]
            
        tweet_csv = "{id}, {created_at}, {timestamp},{lang}, {lon}, {lat},{text},0".format(id=id,created_at=created_at,timestamp=timestamp,lang=lang,
                     lon=lon, lat=lat,text=text)


        if lang == 'en':
            print tweet_csv
            self.producer.send_messages(self.topic, tweet_csv)
        else:
            print "not english"
            print tweet_csv
        return


    def on_limit(self, track):
        sys.stderr.write(track + "\n")
        return

    def on_error(self, status_code):
        sys.stderr.write('Error: ' + str(status_code) + "\n")
        return False

    def on_timeout(self):
        sys.stderr.write("Timeout, sleeping for 60 seconds...\n")
        time.sleep(60)
        return
Пример #56
0
class MessageService:

    def __init__(self,kafkaBroker,kafkaTopic):
        self.broker=kafkaBroker
        self.topic=kafkaTopic;
        self.client=KafkaClient(self.broker)
        self.producer=SimpleProducer(self.client)

    def sendMessage(self,message):
        self.producer.send_messages(self.topic,message)
Пример #57
0
    def test_batched_simple_producer__triggers_by_time(self):
        self.skipTest("Flakey test -- should be refactored or removed")
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [self.current_offset(self.topic, p) for p in partitions]

        batch_interval = 5
        producer = SimpleProducer(
            self.client,
            async_send=True,
            batch_send_every_n=100,
            batch_send_every_t=batch_interval,
            random_start=False)

        # Send 5 messages and do a fetch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        resp = producer.send_messages(self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
        time.sleep(batch_interval)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        ])

        producer.stop()
Пример #58
0
class kafka082_emitter(object):
    def __init__(self, config=None, logger=None):
        self.log = logger
        self.config = config

        # Hush up the kafka module's logger
        import logging

        if self.log == None:
            import logging
            self.log = logging.getLogger(__name__)
            self.log.setLevel(logging.DEBUG)
            self.log.addHandler(logging.StreamHandler())

        if not self.config['interval']:
            self.interval = 1
        else:
            self.interval = self.config['interval']

        if (not "codec" in self.config) or (self.config['codec'] == "none"):
            self.codec = 0x00
            self.log("debug", msg="No codec section found in config or codec value is 'none'. Falling back to NO compression for Kafka messages.")
        elif self.config['codec'] == "gzip":
            self.codec = 0x01
            self.log("debug", msg="'gzip' codec selected for Kafka messages.")
        elif self.config['codec'] == "speedy":
            self.codec = 0x02
            self.log("debug", msg="'speedy' codec selected for Kafka messages.")
        else:
            self.log("warn", msg="Unrecognized codec '{0}'. Falling back to NO compression for Kafka messages.".format(self.config['codec']))
            self.codec = 0x00

        self.brokers = self.config['brokers']
        self.topic = self.config['topic']

        # hush up the kafka logger
        logging.getLogger("kafka").setLevel(logging.INFO)

        self.client = KafkaClient(self.brokers)
        self.producer = SimpleProducer(self.client, codec=self.codec)


    def emit_stats(self, payload, global_iteration):
        # take into account custom interval, if present in config
        if global_iteration % self.interval:
            return

        try:
            self.producer.send_messages(self.topic, json.dumps(payload))
        except:
            import sys, traceback
            ei = sys.exc_info()
            traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr)
        else:
            self.log("debug", msg="Successfully sent batch to Kafka.")
Пример #59
0
def _load_data():
    """
  Sends 50 messages (1 .. 50) to samza-test-topic.
  """
    logger.info("Running test_samza_job")
    kafka = util.get_kafka_client()
    kafka.ensure_topic_exists(TEST_INPUT_TOPIC)
    producer = SimpleProducer(kafka, async=False, req_acks=SimpleProducer.ACK_AFTER_CLUSTER_COMMIT, ack_timeout=30000)
    for i in range(1, NUM_MESSAGES + 1):
        producer.send_messages(TEST_INPUT_TOPIC, str(i))
    kafka.close()