sc = SparkContext(conf=conf) sqlContext = SQLContext(sc) # 查询从当前时间开始前10s的数据 now_datetime = datetime.now() end_time = int(time.mktime(now_datetime.timetuple())) start_time = int(time.mktime((now_datetime - timedelta(seconds=5)).timetuple())) print start_time print end_time his_data = RedisCache().zrange_by_score("his_data_zadd", start_time, end_time) #print his_data['result'] if his_data['result']: hisRDD = sc.parallelize(his_data['result']) his = sqlContext.jsonRDD(hisRDD) print his.count() row = his.sort(his.value.desc()).first() print "|Time: " + row[0], "|Name: " + row[1], "|Oid: " + row[2], "|Value: " + str(row[3]) + "|" his.registerTempTable("his_data_zadd") #sqlContext.cacheTable("his_data_zadd") #assets = sqlContext.sql("SELECT his.name, his.oid FROM his_data_zadd as his WHERE his.value > 200 AND his.oid < 3000000") #sql_str = "SELECT his.name, his.oid FROM (SELECT MAX(temp_t.value), temp_t.name, temp_t.oid FROM his_data_zadd AS temp_t) his" #sql_str = "SELECT his.name, his.oid, his.value FROM his_data_zadd AS his ORDER BY his.value DESC LIMIT 10" #sql_str = 'SELECT his.name, his.oid FROM his_data_zadd AS his WHERE EXISTS (SELECT MAX(temp_t.value) FROM his_data_zadd AS temp_t)' """ Spark 1.5 does not support subquery.
# 查询从当前时间开始前10s的数据 now_datetime = datetime.now() end_time = int(time.mktime(now_datetime.timetuple())) start_time = int( time.mktime((now_datetime - timedelta(seconds=5)).timetuple())) print start_time print end_time his_data = RedisCache().zrange_by_score("his_data_zadd", start_time, end_time) #print his_data['result'] if his_data['result']: hisRDD = sc.parallelize(his_data['result']) his = sqlContext.jsonRDD(hisRDD) print his.count() row = his.sort(his.value.desc()).first() print "|Time: " + row[0], "|Name: " + row[1], "|Oid: " + row[ 2], "|Value: " + str(row[3]) + "|" his.registerTempTable("his_data_zadd") #sqlContext.cacheTable("his_data_zadd") #assets = sqlContext.sql("SELECT his.name, his.oid FROM his_data_zadd as his WHERE his.value > 200 AND his.oid < 3000000") #sql_str = "SELECT his.name, his.oid FROM (SELECT MAX(temp_t.value), temp_t.name, temp_t.oid FROM his_data_zadd AS temp_t) his" #sql_str = "SELECT his.name, his.oid, his.value FROM his_data_zadd AS his ORDER BY his.value DESC LIMIT 10" #sql_str = 'SELECT his.name, his.oid FROM his_data_zadd AS his WHERE EXISTS (SELECT MAX(temp_t.value) FROM his_data_zadd AS temp_t)' """ Spark 1.5 does not support subquery.
from pyspark import SparkContext, SparkConf from pyspark import SQLContext import itertools def print_fun(collect): for item in itertools.chain(collect): print "|Name: " + item[0], "|Value: " + str(item[1]), "|Attribute: " + item[2] if __name__ == "__main__": conf = SparkConf().setAppName("json_ops").setMaster("local[3]") sc = SparkContext(conf=conf) sqlContext = SQLContext(sc) # 将json格式直接直接parallelize为RDD equipmentRDD = sc.parallelize(['{"name":"asset1","value":1,"attribute":"属性1"}', '{"name":"asset2","value":2,"attribute":"属性2"}', '{"name":"asset3","value":3,"attribute":"属性3"}']) equipment = sqlContext.jsonRDD(equipmentRDD) equipment.registerTempTable("equipment") assets = sqlContext.sql("SELECT * FROM equipment as eq WHERE eq.value >= 1 AND eq.value <= 2") assets.show() # 查询结果进行隐射 assetMap = assets.map(lambda asset: (asset.name, asset.value, asset.attribute)).foreachPartition(print_fun) sc.stop()