# reading paramaters for createDirectStream from ITR2_config.ini #kafka_broker_list = "c9t26359.itcs.hpecorp.net:9092,c9t26360.itcs.hpecorp.net:9092,c9t26361.itcs.hpecorp.net:9092" #topics = "HPESCLTA1,HPESCLTA2,HPESCLTA3" # Split topics into a dict and remove empty strings e.g. {'topic1': 1, 'topic2': 1} # Config for Spark batch_duration = 10 conf = SparkConf().setAppName(APP_NAME) # create SparkContext and ssc spark instance sc = SparkContext(conf=conf) ssc = StreamingContext(sc, batch_duration) logger.info("==> Creating Spark DStream ...") #topics = "HPESCLTDVCM1" topics = "RLT_ESCLT_KM_UPD_OPPTY_DTL_F" kafka_stream = kafka2spark(ssc, topics) #logger.debug(kafka_stream) print(type(kafka_stream)) logger.info("Created kafka_stream successfully") lines = kafka_stream.map(lambda x: x[1]) lines.pprint(num=50) lines.foreachRDD(lambda rdd: valid_data(rdd)) # start spark instance ssc.start() ssc.awaitTermination()
print ("Reading Configuration from %s") % APP_LOG_CONF_FILE logger = construct_logger(APP_LOG_CONF_FILE) #logger.info(str(sys.path())) # Obsolete paramater for createStream # zk_quorum = "c9t26359.itcs.hpecorp.net:2181,c9t26360.itcs.hpecorp.net:2181,c9t26361.itcs.hpecorp.net:2181" # group = "ldap" # reading paramaters for createDirectStream from ITR2_config.ini #kafka_broker_list = "c9t26359.itcs.hpecorp.net:9092,c9t26360.itcs.hpecorp.net:9092,c9t26361.itcs.hpecorp.net:9092" #topics = "HPESCLTA1,HPESCLTA2,HPESCLTA3" # Split topics into a dict and remove empty strings e.g. {'topic1': 1, 'topic2': 1} # Config for Spark batch_duration = 10 conf = SparkConf().setAppName(APP_NAME) # create SparkContext and ssc spark instance sc = SparkContext(conf=conf) ssc = StreamingContext(sc, batch_duration) logger.info("==> Creating Spark DStream ...") kafka_stream = kafka2spark(ssc) #logger.debug(kafka_stream) logger.info("Created kafka_stream successfully") lines = kafka_stream.map(lambda x: x[1]) lines.pprint() # start spark instance ssc.start() ssc.awaitTermination()