Exemplo n.º 1
0
    def kafka_stream_analysis_loop(self):
        """
        Endless loop that reads from the Kafka stream and does sentiment
        analysis of each message. Aggregates the results into averages for
        each thread.
        """
        consumer = kafka_helper.get_kafka_consumer(topic='chatter')

        for message in consumer:
            print("%s:%d:%d: key=%s value=%s" %
                  (message.topic, message.partition, message.offset,
                   message.key, message.value))
            blob = TextBlob(message.value['Body'])

            # Keep running averages of the most polarizing threads (in the future, we could also keep track
            # of posters and parentIds).
            feed_item_id = message.value['FeedItemId']
            if feed_item_id not in self.feed_item_stats:
                self.feed_item_stats[feed_item_id] = dict(total_polarity=0,
                                                          num_times=0)

            current_stat = self.feed_item_stats[feed_item_id]
            current_stat['total_polarity'] += blob.sentiment.polarity
            current_stat['num_times'] += 1
            current_stat['average'] = current_stat[
                'total_polarity'] / current_stat['num_times']
    def __init__(self):
        logging.info('Started consuming\n')
        self.use_db = False

        try:  #heroku
            self.consumer = kafka_helper.get_kafka_consumer(topic=TOPIC)
            logging.info('RUNNING CONSUMER ON HEROKU\n')

        except Exception:  #locally on docker
            consumer = KafkaConsumer(
                'test',
                bootstrap_servers='localhost:9092',
                value_deserializer=lambda m: json.loads(m.decode('utf-8')))
            self.consumer = consumer
            logging.info(
                'RUNNING CONSUMER LOCAL - so not using kafka_helper\n')

        if 'DATABASE_URL' in os.environ:
            self.use_db = True
            import psycopg2
            DATABASE_URL = os.environ['DATABASE_URL']
            try:
                self.conn = psycopg2.connect(DATABASE_URL, sslmode='require')
                self.cur = self.conn.cursor()
                logging.info("Connected to the database")
            except:
                logging.info("I am unable to connect to the database")
Exemplo n.º 3
0
from flask import Flask
# from flask import render_template
import os
import kafka_helper
import json
import asyncio
import websockets

# app = Flask(__name__)

# @app.route("/")
# def index():
#     return render_template("index.html", port=os.environ["WS_PORT"])

topic = "{}temp".format(os.environ["KAFKA_PREFIX"])
consumer = kafka_helper.get_kafka_consumer(topic=topic)
print("Connected")


async def echo(websocket, path):
    # async for message in websocket:
    for message in consumer:
        print(message)
        await websocket.send(json.dumps(message.value))


asyncio.get_event_loop().run_until_complete(
    websockets.serve(echo, 'localhost', '8765'))
asyncio.get_event_loop().run_forever()
Exemplo n.º 4
0
def testKafkaHelperRCV():
    import kafka_helper
    consumer = kafka_helper.get_kafka_consumer(topic='ple2')
    for message in consumer:
        logger.debug(message)