Пример #1
0
class Sparksteam(Myredis, Myhbase):
    def __init__(self, zkQuorum, topic, hbtable):
        self.zkQuorum = zkQuorum
        self.topic = topic
        self.hbase = Myhbase(hbtable)
        self.redis = Myredis()

    def start(self):

        sc = SparkContext(appName="PythonStreamingNOTHS")
        ssc = StreamingContext(sc, 10)

        kvs = KafkaUtils.createStream(ssc, self.zkQuorum,
                                      "spark-streaming-consumer",
                                      {self.topic: 1})
        print('******* Event received in window: ', kvs.pprint())

        if topic == 'NOTHS-crawler-topic':
            kvs.foreachRDD(self.save_crawler_hbase)
        elif topic == 'NOTHS-trends-topic':
            kvs.foreachRDD(self.save_trends_hbase)

        ssc.start()
        ssc.awaitTermination()

    def save_trends_hbase(self, time, rdd):

        try:
            recs = rdd.collect()
            if recs:
                for rec in recs:
                    self.hbase.save_trend(rec)
        except:
            print('HBase update Err.')

    def save_crawler_hbase(self, time, rdd):

        try:
            recs = rdd.collect()
            if recs:
                for rec in recs:
                    self.hbase.save_crawler(rec)

                    x = rec[1].split(',')
                    if str(x[0]) == 'category_link':
                        if self.redis.isNewCategory(str(x[2])):
                            self.redis.addcategory(str(x[2]))
        except:
            print('HBase update Err.')
Пример #2
0
class Sparksteam(Myredis, Myhbase):

    def __init__(self, zkQuorum, topic, hbtable):
        self.zkQuorum = zkQuorum
        self.topic = topic
        self.hbase = Myhbase(hbtable)
        self.redis = Myredis()

    def start(self):

        sc = SparkContext(appName="PythonStreamingNOTHS")
        ssc = StreamingContext(sc, 10)

        kvs = KafkaUtils.createStream(ssc, self.zkQuorum, "spark-streaming-consumer", {self.topic: 1})
        print('******* Event received in window: ', kvs.pprint())

        if topic == 'NOTHS-crawler-topic':
            kvs.foreachRDD(self.save_crawler_hbase)
        elif topic == 'NOTHS-trends-topic':
            kvs.foreachRDD(self.save_trends_hbase)

        ssc.start()
        ssc.awaitTermination()

    def save_trends_hbase(self, time, rdd):

        try:
            recs = rdd.collect()
            if recs:
                for rec in recs:
                    self.hbase.save_trend(rec)
        except:
            print('HBase update Err.')


    def save_crawler_hbase(self, time, rdd):

        try:
            recs = rdd.collect()
            if recs:
                for rec in recs:
                    self.hbase.save_crawler(rec)

                    x = rec[1].split(',')
                    if str(x[0]) == 'category_link':
                        if self.redis.isNewCategory(str(x[2])):
                            self.redis.addcategory(str(x[2]))
        except:
            print('HBase update Err.')
Пример #3
0
class GoogleTrend(Myredis, Gtrend, MyKafka):
    def __init__(self):
        self.redis = Myredis()
        self.gtrend = Gtrend()
        self.kafka = MyKafka('NOTHS-trends-topic')

    def start(self):

        while True:
            time.sleep(randint(60, 90))
            print('starting google trend ...')
            category = self.redis.getNextCategory()
            if category:
                print('reporting on :', category)
                gdata = self.gtrend.get_report(category)
                cdata = self.pack_gdata(gdata, category.decode("utf-8"))

                for data in cdata:
                    list_ = ",".join(data)
                    self.kafka.send(list_)
                print('end')
Пример #4
0
class GoogleTrend(Myredis, Gtrend, MyKafka):
    def __init__(self):
        self.redis = Myredis()
        self.gtrend = Gtrend()
        self.kafka = MyKafka("NOTHS-trends-topic")

    def start(self):

        while True:
            time.sleep(randint(60, 90))
            print("starting google trend ...")
            category = self.redis.getNextCategory()
            if category:
                print("reporting on :", category)
                gdata = self.gtrend.get_report(category)
                cdata = self.pack_gdata(gdata, category.decode("utf-8"))

                for data in cdata:
                    list_ = ",".join(data)
                    self.kafka.send(list_)
                print("end")
Пример #5
0
 def __init__(self, zkQuorum, topic, hbtable):
     self.zkQuorum = zkQuorum
     self.topic = topic
     self.hbase = Myhbase(hbtable)
     self.redis = Myredis()
Пример #6
0
 def __init__(self, zkQuorum, topic, hbtable):
     self.zkQuorum = zkQuorum
     self.topic = topic
     self.hbase = Myhbase(hbtable)
     self.redis = Myredis()
Пример #7
0
 def __init__(self):
     self.redis = Myredis()
     self.gtrend = Gtrend()
     self.kafka = MyKafka('NOTHS-trends-topic')
Пример #8
0
 def __init__(self):
     self.redis = Myredis()
     self.gtrend = Gtrend()
     self.kafka = MyKafka("NOTHS-trends-topic")