Exemplo n.º 1
0
def set(gkey,data):
	"""上传值,先从redis缓存中取出item的id,再匹配itemkey入history库
	参数说明:
	gkey group 的gkey
	data 数据集 最好一次不要超过100个
	  {'itemkey':[值,生成时间默认为当前时间戳]}
	"""
	stat_db = mysqldb.get_db()
	rdb = rediswrap.get_redis()
	kys = list(data.keys())
	#加入表标识以确定目标表
	kys.append('mrk')
	itm = rdb.hmget(RD_ITM_KEY_PRFX+gkey,kys)
	itm_dict = {}
	if itm:
		mrk=itm.pop().split(',')
		hst_tb = get_hst_name(mrk[1])
		value = []
		for i in range(0,len(itm)):
			if itm[i] :
				tmp = data[kys[i]]
				clock = tmp[1] if tmp[1] else int(time.time())
				value.append("('%s','%s','%s')" %(itm[i],tmp[0],clock))
		if value:
			res,desc = stat_db.query("insert into %s(itemid,val,clock)values%s" % (hst_tb,','.join(value)),1)
			if res ==-1 :
				return [res,str(desc)]
			return res,desc
		else:
			return [-1,"no items find."]		
	return [-2,'items not in redis.']
Exemplo n.º 2
0
def get(gkey,itm=[],start_time=0,stop_time=0,sort='clock asc',groupby=0,page=None):
	"""
	获取统计数据
	参数说明
	   itm itemid列表 为空时提取整个group的记录
	   start_time 开始时间戮
	   stop_time  结构时间戮
	   sort       排序方式 
	   groupby    分组方式 
	   page       分页参数集 {'site':每页数据量,'num':页码} 默认返回所有记录
	"""
	stat_db = mysqldb.get_db()
	rdb = rediswrap.get_redis()
	sql_item = {'fields':'*'}
	r_itmkey = RD_ITM_KEY_PRFX+gkey
	if itm:
		itm.append('mrk')
		itmids = rdb.hmget(r_itmkey,itm)
		mrk = itmids.pop().split(',')
		
	else:
		mrk = rdb.hget(r_itmkey,'mrk')
		itmids = rdb.hvals(r_itmkey)
		itmids.remove(mrk)
		mrk = mrk.split(',')	

	ids = [k for k in itmids if k ]	
	sql_item['table'] = get_hst_name(mrk[1]) 
	sql_item['where'] = " itemid in (%s) " % ",".join(ids)
	start_time = utils.timestamp(start_time) if start_time else utils.timestamp(0,'d')
	stop_time = utils.timestamp(stop_time) if stop_time else int(time.time())
	sql_item['where'] += " and clock>=%s and clock <%s" % (start_time,stop_time)
	sql_item['order'] = sort

	if groupby:
		if groupby ==1:
			sql_item['group'] = 'itemid'
		elif groupby == 2:
			sql_item['group'] = 'clock'
		else:
			sql_item['group'] = 'itemid,clock' 
		sql_item['fields'] = "itemid,sum(val) as val,clock"
	#分页这个mark一下。待定
	if page:
		s = page['num']*page['site']
		sql_item['limit'] = "%s,%s" %(s,page['site'])
	
	res,desc = stat_db.query(sql_item)
	#取得items的名称
	item_lab = {}
	if res == 0 and desc:
		itm_tb = "stat_item_" + mrk[0] if mrk[0] else "stat_item"
		rs, ds = stat_db.query("select name,id from %s where id in(%s)" %(itm_tb,",".join(ids) ))
		if rs==0 and ds:
			for row in ds :
				item_lab[row['id']]=row['name']
		return 0,[item_lab,desc]
	return 0,[{},[]]
Exemplo n.º 3
0
def gettags(kw):
	"""标签缓存key: biz:prod:targ
	test: 	
     curl -l -H "Content-Type: application/json" -X POST -d  '{"biznum":"tags.corp","tag_id":101,"lv2rows":13,"lv3rows":1}'     http://192.168.10.126:6000/biz_tags/gettags
	参数说明:
		biznum:业务配置标识
		tag_id:父标签
		lv2rows:二级标签提取数量
		lv3rows:三级标签提取数量
	"""
	biznum = kw.get('biznum','')

	sort = ""
	tag_id = str(kw.get('tag_id',""))
	tag_id_len = 0 if len(tag_id) < 3 else len(tag_id)
	lv2rows = kw.get('lv2rows',0)
	lv3rows = kw.get('lv3rows',0)
		
	if not biznum:
		return [-6,'parameter biznum not set.']
	bizcnf = get_cnf_val(biznum,sconf.BIZ)
	#取业务配置
	if not bizcnf:
		return sconf.err_handle.biznum_not_config

	ttl     = bizcnf.get('ttl',600)
	isort   = bizcnf.get('sort','total_num')
	#取数据
	rdb = rediswrap.get_redis('cache')
	ckey = "%s.%s%s%s%s" %(biznum,sort,tag_id,lv2rows,lv3rows)
	res = rdb.get(ckey)
	if res:
		return 0,json.loads(res)
		
	res = rdb.get("biz:targ.%s"%bizcnf['table'])
	if not res:
		s,res = tagsclass.prod_targs(biznum)
	else:
		res = json.loads(res)
	#组织数据
	#tag_id of level 1
	data={}
	if tag_id_len ==3 and tag_id in res and 'extn' in res[tag_id]:
		data = res[tag_id]
	elif tag_id_len ==6:
		data = res[str(tag_id)[:3]]['extn'][tag_id]
	#tag_id of all
	elif not tag_id:
		data={'info':{'tag_id':0}}
		data['extn'] = res
	if data and 'extn' in data:
		res = tag_recursion(data,lv2rows,lv3rows)
		rdb.setex(ckey,json.dumps(res),ttl)
		return 0,res 
	else:
		return 0,{}
Exemplo n.º 4
0
def prod_targs(key):
	bizcnf = get_cnf_val(key,sconf.BIZ)
	#取业务配置
	if not bizcnf:
		return sconf.err_handle.biznum_not_config
	ttl     = bizcnf.get('ttl',600)
	#取数据库配置
	dbinfo = get_host_by_data(bizcnf['source'])
	if not dbinfo:
		return sconf.err_handle.db_not_config
	dbinfo['dbname']=bizcnf['source'].split('.')[-1]
	rdb = rediswrap.get_redis('cache')
	rkey = "biz:targ.%s"%bizcnf['table']
	db = dbclass(dbinfo)
	res,desc = db.connect()
	
	if res == -1:
		return sconf.err_handle.db_err
	sql_item={}
	sql_item['table'] = bizcnf['table']
	sql_item['fields'] = bizcnf['fields']
	sql_item['order'] = "tag_id asc"

	res,desc = db.query(sql_item)

	if res == -1:
		return sconf.err_handle.db_err
	if desc:
		obj = {}
		for row in desc:
			tid = str(row['tag_id'])
			if len(tid) ==3:
				obj[row['tag_id']]={"info":row}
			elif len(tid) ==6:
				pid = int(tid[:3])
				if 'extn' not in  obj[pid]:
					obj[pid]['extn']={}
				obj[pid]['extn'][row['tag_id']]={"info":row}
			elif len(tid)==9:

				ppid = int(tid[:3])
				pid =  int(tid[:6])
				if 'extn' not in  obj[ppid]['extn'][pid]:
					obj[ppid]['extn'][pid]['extn']={}
				obj[ppid]['extn'][pid]['extn'][row['tag_id']]={"info":row}
		#try:
		#rdb.set(rkey,gzip.compress(json.dumps(res).encode()))
		rdb.set(rkey,json.dumps(obj))
		#except:
		#	pass
		return 0,obj
Exemplo n.º 5
0
def reg_items2redis(gkey,itm=[]):
	"""将items存入redis缓存
	"""
	rdb = rediswrap.get_redis()
	stat_db = mysqldb.get_db()
	hash_tb = rediswrap. get_hash(gkey, system='default',serialized_type='string')
	res,desc = get_groups([gkey])
	if res==0 and desc:
		itm_tb = get_itm_name(desc[0]['items_mrk'])
		rdb.hsetnx(RD_ITM_KEY_PRFX+gkey,'mrk',"%s,%s" %(desc[0]['items_mrk'],desc[0]['history_mrk']))
		gid = desc[0]['gid']
		
		i=0
		#设置了itemkey列表,依据itemkey取值
		if itm:
			itm_len = len(itm)
			while True:
				j = i+50 if i+50 < itm_len else itm_len 
				if i == itm_len:
					break
				key = ['"%s"' % k for k in itm[i:j]]
				sql = "select id,itemkey from %s where itemkey in (%s)" % (itm_tb, ",".join(key))

				res,desc = 	stat_db.query(sql)

				new_dict = {}
				if res ==0 and desc:
					for row in desc:
						new_dict[row['itemkey']]=row['id']
					rs = rdb.hmset(RD_ITM_KEY_PRFX+gkey,new_dict)
				i = j
		#未设置itemkey列表,依据gkey取出该组所有的items
		else:
			res, desc = stat_db.query("select min(id) as mnid,max(id) as mxid from %s where gid = %s" % (itm_tb,gid))

			mnid = desc[0]['mnid']
			mxid = desc[0]['mxid']
			limit = 50
			while True: 
				if mnid == mxid:
					break
				sql = "select id,itemkey from %s where id > %s and gid=%s limit %s" % (itm_tb,mnid,gid,limit)
				res,desc = 	stat_db.query(sql)
				
				new_dict = {}
				if res ==0 and desc:
					for row in desc:
						new_dict[row['itemkey']]=row['id']
						mnid = row['id']
					rdb.hmset(RD_ITM_KEY_PRFX+gkey,new_dict)	
Exemplo n.º 6
0
def update_item_key(gkey,oldkey,newkey):
	stat_db = mysqldb.get_db()
	res,desc = get_groups([gkey])
	rdb = rediswrap.get_redis()
	if res==0 and desc:
		itm_tb = get_itm_name(desc[0]['items_mrk'])	
		res,des = stat_db.update(itm_tb,{"itemkey":newkey},"itemkey='%s'" % oldkey)
		if res ==0 and des:
			id = rdb.hget(RD_ITM_KEY_PRFX+gkey,oldkey)
			if id :
				rdb.hset(RD_ITM_KEY_PRFX+gkey,newkey,id)
		elif res == -1:
			return err_handle.db_err
		return res,des
	return res,desc
Exemplo n.º 7
0
def send(gkey,data):
	"""在redis中缓存统计数据
	"""
	rdb = rediswrap.get_redis()
	keys = list(data.keys())
	ids = rdb.hmget(RD_ITM_KEY_PRFX + gkey,keys)
	if not ids:
		return [-7,"items not find in redis."]
	x = 0
	false_key=[]
	for i in range(0,len(ids)):
		if ids[i]:
			rdb.hincrby(RD_ITM_HST_TMP_PRFX+gkey,ids[i],data[keys[i]])
			x +=1 
		else:
			false_key.append(keys[i])
	return [0,x,false_key]
Exemplo n.º 8
0
def sphinx2redis(cnfkey):
	"""通过配置文件,获取统计数据
	"""
	#取业务配置
	bizcnf = get_cnf_val(cnfkey,sconf.BIZ)	
	if not bizcnf:
		return sconf.err_handle.biznum_not_config
	rdb = rediswrap.get_redis('cache')

	for i in range(0,len(bizcnf['prama'])):
		itm = bizcnf['prama'][i]
		source = itm['source'].split('.')
		if source[1] == 'sphinx':
			host_info = get_host_by_data(itm['source'])
			if  not host_info :
				return sconf.sphinx_index_not_found
			
			sp = sphinx(host_info['host'],host_info['port'])
			expression = itm['expression']
			expression['index'] = source[2]
			total_found = 0
			while True:
				#if total_found >0:
				#	if expression['pageSize'] * expression['page'] >=total_found:
				#		break
				#	expression['page'] +=1
					
				sp.initQuery(itm['expression'])
				rs = sp.RunQueries()
				pprint(rs)
				if rs and rs[0]['status']==0:
					total_found = rs[0]['total_found']
					_items = {}
					for row in rs[0]['matches']:
						if itm['key'] in row['attrs'] and itm['value'] in row['attrs']:
							_items[row['attrs'][itm['key']]]=row['attrs'][itm['value']]
					if _items:
						print(_items)
						#res = rdb.mset(cnfkey,_items)
						
				else:
					print(sp._error)
					break
				break
Exemplo n.º 9
0
def del_redis_items(gkey):
	"""删除item缓存
	"""
	rdb = rediswrap.get_redis()
	rdb.delete(RD_ITM_KEY_PRFX+gkey)
Exemplo n.º 10
0
def mod_redis_mrk(gkey,items_mrk,history_mrk):
	"""更改item在redis中的items_mrk,items_mrk
	"""
	rdb = rediswrap.get_redis()
	rdb.hsetnx(RD_ITM_KEY_PRFX+gkey,'mrk',"%s,%s" %(items_mrk,history_mrk))
Exemplo n.º 11
0
def cache_set(key,val,ttl=0):
    rdb = rediswrap.get_redis('cache')
    val = gzip.compress(val.encode())
    rdb.setex(key,val,ttl)
Exemplo n.º 12
0
def cache_get(key):
    rdb = rediswrap.get_redis('cache')
    return gzip.decompress(rdb.get(key)).decode()
Exemplo n.º 13
0
def gettags_base(kw):
	"""标签缓存key: biz:prod:targ
	"""
	biznum = kw.get('biznum','')
	sort = kw.get('sort','pro')
	tag_id = int(kw.get('tag_id',0))
	tag_id_len = 0 if len(str(tag_id)) < 3 else len(str(tag_id))
	if not biznum:
		return [-6,'parameter biznum not set.']
	bizcnf = get_cnf_val(biznum,sconf.BIZ)
	#取业务配置
	if not bizcnf:
		return sconf.err_handle.biznum_not_config
	lv2rows = kw.get('lv2rows',bizcnf['lv2rows'])
	lv3rows = kw.get('lv3rows',bizcnf['lv3rows'])
	ttl     = kw.get(bizcnf['ttl'],600)
	isort   = kw.get(bizcnf['sort'],'total_num')
	#取数据库配置
	dbinfo = get_host_by_data(bizcnf['source'])
	if not dbinfo:
		return sconf.err_handle.db_not_config
	dbinfo['dbname']=bizcnf['source'].split('.')[-1]
	rdb = rediswrap.get_redis('cache')
	
	res = rdb.get("%s.%s%s%s%s" %(biznum,sort,tag_id,lv2rows,lv3rows))
	if res:
		return gzip.decompress(res)
	db = dbclass(dbinfo)
	res,desc = db.connect()
	
	if res == -1:
		return sconf.err_handle.db_err
	sql_item={}
	sql_item['table'] = bizcnf['table']
	sql_item['fields'] = bizcnf['fields']
	if tag_id_len ==3:
		sql_item['where'] = "tag_id = %s or (tag_id>%s000 and tag_id <%s000) or (tag_id>%s000000 and tag_id<%s000000) " % (tag_id,tag_id,(tag_id+1),tag_id,(tag_id+1))
	elif tag_id_len >=6:
		sql_item['where'] = "tag_id = %s or (tag_id>%s000 and tag_id <%s000)" % (tag_id,tag_id,(tag_id+1))

	sql_item['order'] = "tag_id asc"
	pprint.pprint(kw)
	pprint.pprint(sql_item)
	res,desc = db.query(sql_item)

	if res == -1:
		return sconf.err_handle.db_err
	if desc:
		tag_id_len = 3 if tag_id_len==0 else tag_id_len
		obj = {}
		for row in desc:
			tid = str(row['tag_id'])
			if len(tid)-tag_id_len ==0:
				obj[row['tag_id']]={"info":row}
			elif len(tid) - tag_id_len ==3:
				pid = int(tid[:tag_id_len])
				if 'extn' not in  obj[pid]:
					obj[pid].update({'extn':{"info":row}})
				obj[pid]['extn'].update({row['tag_id']:row})
			elif len(tid) - tag_id_len==6:

				ppid = int(tid[:tag_id_len])
				pid =  int(tid[:tag_id_len+3])
				if 'extn' not in  obj[ppid]['extn'][pid]:
					obj[ppid]['extn'][pid].update({'extn':{"info":row}})
				obj[ppid]['extn'][pid].update({row['tag_id']:row})
		#pprint.pprint(obj)
		return 0,obj
Exemplo n.º 14
0
def sumdelay():
	"""
	检测redis中的统计缓存,根据stat_item_group.sumdelay
	将满足条件的统计数据入库,并清除缓存
	"""
	rdb = rediswrap.get_redis()
	db = mysqlwrap.get_db()
	now = time.localtime()
	fields = rdb.keys(stat_base.RD_ITM_HST_TMP_PRFX+'*')

	if fields:
		gkeys = [f.split(':')[-1] for f in fields]

		todo_gkeys=[]
		step = 20
		#初始化时间
		clock = int(time.time())-120
		for i in range(0,len(gkeys),step):
			#每次提取step条group记录
			ks = ["'%s'" % k for k in gkeys[i:i+step]]
			sql = "select gkey,sumdelay,history_mrk from stat_item_group where gkey in (%s)" % ",".join(ks)
			res, desc = db.query(sql)
			if res==0 and desc:
				for row in desc:
					gk = ""
					#按时汇总
					if row['sumdelay'] == 2:
						clock = utils.timestamp(clock,'h')
						gk =row['gkey'] 
					#按天汇总,当前时间0点
					elif row['sumdelay'] == 1 and now.tm_hour==0:
						clock = utils.timestamp(0,'d')
						gk =row['gkey'] 
					#按月汇总,当天是1号,时间是0点
					elif row['sumdelay'] == 3 and (now.tm_mday==1 or now.tm_hour==0) :
						clock = utils.timestamp(0,'d')
						gk =row['gkey'] 
					#按周汇总,当天是周1,时间是0点
					elif row['sumdelay'] == 4 and (now.tm_wday!=1 or now.tm_hour!=0) :
						clock = utils.timestamp(0,'d')
						gk =row['gkey'] 
					#gk =row['gkey']
					if gk:
						#todo_gkeys.append(row)
						#print(stat_base.RD_ITM_HST_TMP_PRFX+row['gkey'],RD_TMP_PRF+row['gkey'])
						#rdb.rename(stat_base.RD_ITM_HST_TMP_PRFX+row['gkey'],RD_TMP_PRF+row['gkey'])

						key = stat_base.RD_ITM_HST_TMP_PRFX+gk
						r_item = rdb.hgetall(key)
						rdb.delete(key)
						tb = stat_base.get_hst_name(row['history_mrk'])
						cnt = 0
						if r_item:
							
							logger.info("sum stat group %s from %s into %s" % (row['gkey'],key,tb))
							ks = list(r_item.keys())
							#每次写50条记录
							for i in range(0,len(ks),50):
								value = ["('%s','%s','%s')" %(j,r_item[j],clock) for j in ks[i:i+50]]
								ins_sql = "insert into %s(itemid,val,clock)values%s" % (tb,','.join(value))
								res,desc = db.query(ins_sql,1)
								if res == -1:
									logger.error(str(desc))
									data_loge.info(ins_sql)
								else:
									cnt+=len(value)
							
							logger.info("[%s] count:[%s] ins:[%s] rows." %(gk,len(ks),cnt))
					
				#汇总数据入库
				"""
Exemplo n.º 15
0
			except Exception as e:
				logger.info(str(e))
			logger.info("sumdelay end.")
		time.sleep(1)
										
if __name__=="__main__":

	import json
	sconf.SYS = json.loads("".join(open('../conf/sys.json').read().split()))
	sconf.HOST = json.loads("".join(open('../conf/host.json').read().split()))

	sconf.DATA_SOURC = json.loads("".join(open('../conf/databases.json').read().split()))
	#biz_info = json.loads("".join(open('../conf/biz.json').read().split()))
	biz_info = json.loads(open('../conf/biz.json').read().replace('\n','').replace('\t',''))

	#加载数据库
	mysqlwrap.setup_db('default',sconf.SYS['mysql'])
	mysqlwrap.pool_monitor()
	rediswrap.setup_redis('default',sconf.SYS['redis']['host'],sconf.SYS['redis']['port'])
	
	rdb = rediswrap.get_redis()
	#res = rdb.scan(0,match='stat*',count=10 )
	#res = rdb.keys('test*')
	#print(help(rdb.rename))
	#res = rdb.renamenx ('test:zset.1','test:rename.1')
	#res = rdb.hgetall('stat:items:Crawler_stat.json' )
	#res = rdb.hgetall('stat:hst:tmp:Crawler_stat.json' )
	#print(res)
	sumdelay()
	
	
Exemplo n.º 16
0
def sphinx2redis_ex(cnfkey):
	"""通过配置文件,获取统计数据
	"""
	#取业务配置
	bizcnf = get_cnf_val(cnfkey,sconf.BIZ)	
	if not bizcnf:
		return sconf.err_handle.biznum_not_config
	rdb = rediswrap.get_redis('cache')
	dbinfo = get_host_by_data("base.mysql.biz72_product")
	dbinfo['dbname']="biz72_product"
	db  = mysqlwrap.dbclass(dbinfo)
	db.connect()
	sql = "select tag_id from pro_tag where tag_id>100 and tag_id<200"
	res,desc = db.query(sql)
	print("rows:",len(desc))
	source="base.sphinx.IDX_com_corp_dist"
	host_info = get_host_by_data(source)
	sp = sphinx(host_info['host'],host_info['port'])
	prama = {
					"querymod":"SPH_MATCH_EXTENDED2",
					"pageSize":1,
					"page":1,
					"intType":{
            		},
            		"index":"IDX_com_corp_dist"
			}
	itm1 = {}
	itm2 = {}
	itm3 = {}
	x=0
	st = int(time.time())
	for row in desc:
		x+=1
		if x%100==0:
			print(x,time.time()-st)
		tag_len = len(str(row['tag_id']))
		if tag_len == 3:
			f = "r_com"
		elif tag_len == 6:
			f = "c_com"
		elif tag_len == 9:
			f = "com"
		prama['intType'][f]=str(row['tag_id'])
		#prama['intType']["com"]='101105105'
		sp.initQuery(prama)
		rs = sp.RunQueries()

		if rs and rs[0]['status']==0:
			total_found = rs[0]['total_found']
			print("tag_id",row['tag_id'],"total_found",total_found)
			if total_found >0:
				rdb.hset('stat2cach.tags_corp',row['tag_id'],total_found)
			if tag_len == 3:
				itm1[row['tag_id']]=total_found
			elif tag_len == 6:
				itm2[row['tag_id']]=total_found
			elif tag_len == 9:
				itm3[row['tag_id']]=total_found
		else:
			print(sp._error)

	print("time:",time.time()-st)
	print("itm1","len:",len(itm1),"sum:",sum(list(itm1.values())))		
	print("itm2","len:",len(itm2),"sum:",sum(list(itm2.values())))	
	print("itm3","len:",len(itm3),"sum:",sum(list(itm3.values())))