class Sparksteam(Myredis, Myhbase): def __init__(self, zkQuorum, topic, hbtable): self.zkQuorum = zkQuorum self.topic = topic self.hbase = Myhbase(hbtable) self.redis = Myredis() def start(self): sc = SparkContext(appName="PythonStreamingNOTHS") ssc = StreamingContext(sc, 10) kvs = KafkaUtils.createStream(ssc, self.zkQuorum, "spark-streaming-consumer", {self.topic: 1}) print('******* Event received in window: ', kvs.pprint()) if topic == 'NOTHS-crawler-topic': kvs.foreachRDD(self.save_crawler_hbase) elif topic == 'NOTHS-trends-topic': kvs.foreachRDD(self.save_trends_hbase) ssc.start() ssc.awaitTermination() def save_trends_hbase(self, time, rdd): try: recs = rdd.collect() if recs: for rec in recs: self.hbase.save_trend(rec) except: print('HBase update Err.') def save_crawler_hbase(self, time, rdd): try: recs = rdd.collect() if recs: for rec in recs: self.hbase.save_crawler(rec) x = rec[1].split(',') if str(x[0]) == 'category_link': if self.redis.isNewCategory(str(x[2])): self.redis.addcategory(str(x[2])) except: print('HBase update Err.')
class GoogleTrend(Myredis, Gtrend, MyKafka): def __init__(self): self.redis = Myredis() self.gtrend = Gtrend() self.kafka = MyKafka('NOTHS-trends-topic') def start(self): while True: time.sleep(randint(60, 90)) print('starting google trend ...') category = self.redis.getNextCategory() if category: print('reporting on :', category) gdata = self.gtrend.get_report(category) cdata = self.pack_gdata(gdata, category.decode("utf-8")) for data in cdata: list_ = ",".join(data) self.kafka.send(list_) print('end')
class GoogleTrend(Myredis, Gtrend, MyKafka): def __init__(self): self.redis = Myredis() self.gtrend = Gtrend() self.kafka = MyKafka("NOTHS-trends-topic") def start(self): while True: time.sleep(randint(60, 90)) print("starting google trend ...") category = self.redis.getNextCategory() if category: print("reporting on :", category) gdata = self.gtrend.get_report(category) cdata = self.pack_gdata(gdata, category.decode("utf-8")) for data in cdata: list_ = ",".join(data) self.kafka.send(list_) print("end")
def __init__(self, zkQuorum, topic, hbtable): self.zkQuorum = zkQuorum self.topic = topic self.hbase = Myhbase(hbtable) self.redis = Myredis()
def __init__(self): self.redis = Myredis() self.gtrend = Gtrend() self.kafka = MyKafka('NOTHS-trends-topic')
def __init__(self): self.redis = Myredis() self.gtrend = Gtrend() self.kafka = MyKafka("NOTHS-trends-topic")