Пример #1
0
def sonFavRate():
    print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率"
    files = os.listdir("/home/zhangzhonghui/data/seqHit/")
    for file in sorted(files):
        if not file.startswith("20"):
            continue
        day = file[file.rfind("/") + 1:]
        ts = {}
        for line in open("/home/zhangzhonghui/data/seqHit/" + file):
            if line.startswith("版本"):
                continue
            (v, stype, sret) = seqHit.readLine(line)
            selfNum = 0
            for st in sret.sonTypes:
                if st in recipeInfo:
                    selfNum += DictUtil.sum(sret.sonTypes[st])
            if selfNum > 0:
                if recipeFavMethod in sret.gsonTypes:
                    favNum = DictUtil.sum(sret.gsonTypes[recipeFavMethod])
                    if stype not in ts:
                        ts[stype] = [0, 0, 0]
                    ts[stype][0] += sret.num
                    ts[stype][1] += selfNum
                    ts[stype][2] += favNum
                    print day + "\t" + v + "\t" + stype + favStr(
                        sret.num, selfNum, favNum)


#"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32))
        for stype in ts:
            (num, hnum, favNum) = ts[stype]
            print day + "\tall_version\t" + stype + favStr(num, hnum, favNum)
Пример #2
0
def sonFavRate():
	print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率"
	files=os.listdir("/home/zhangzhonghui/data/seqHit/")
	for file in sorted(files):
		if not file.startswith("20"):
			continue
		day=file[file.rfind("/")+1:]
		ts={}
		for line in open("/home/zhangzhonghui/data/seqHit/"+file):
			if line.startswith("版本"):
				continue
			(v,stype,sret)=seqHit.readLine(line)
			selfNum=0
			for st in sret.sonTypes:
				if st in recipeInfo:
					selfNum+=DictUtil.sum(sret.sonTypes[st])
			if selfNum > 0:
				if recipeFavMethod in sret.gsonTypes:
					favNum=DictUtil.sum(sret.gsonTypes[recipeFavMethod])
					if stype not in ts:
						ts[stype]=[0,0,0]
					ts[stype][0]+=sret.num
					ts[stype][1]+=selfNum
					ts[stype][2]+=favNum
					print day+"\t"+v+"\t"+stype+favStr(sret.num,selfNum,favNum)
#"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32))
		for stype in ts:
			(num,hnum,favNum)=ts[stype]
			print day+"\tall_version\t"+stype+favStr(num,hnum,favNum)
Пример #3
0
def reportSonsFile(method, version, file, ts, dts, v2=100000):
    if True:
        for line in open(file):
            if line.startswith("版本"):
                continue
            if True:
                try:
                    (v, stype, sret) = seqHit.readLine(line)
                except:
                    sys.stderr.write(line)
                    continue
                tnum = sret.num + 1e-32
                if tnum < 1.0:
                    continue
                try:
                    v = int(v)
                except:
                    v = 0
                if v >= version and v <= v2 and stype.startswith(method):
                    if len(dts) <= 0 or dts[-1][0] != file:
                        dts.append((file, {}))
                    for type in sret.sonTypes:
                        rate = DictUtil.sum(sret.sonTypes[type])
                        type = hitItemName.getTypeName(type)
                        if type not in dts[-1][1]:
                            ts[type] = 1
                            dts[-1][1][type] = 0
                        dts[-1][1][type] += rate
    '''
Пример #4
0
def ds(uuidFile, today):
    regUserFile = "/home/zhangzhonghui/data/reg/regUser." + today
    uuids = {}
    for line in open(regUserFile):
        cols = line.strip().split()
        uuids[cols[0]] = [cols[1], ""]
    for line in open(uuidFile):
        cols = line.split()
        if len(cols) < 4: continue
        uuid = cols[0]
        if uuid not in uuids: continue
        day = cols[2]
        if uuids[uuid][1] == "":
            uuids[uuid][1] = day
        else:
            if uuids[uuid][1] > day:
                uuids[uuid][1] = day
    ds = {}
    for uuid in uuids:
        OK, lastDay = uuids[uuid]
        if OK not in ds:
            ds[OK] = {}
        div = TimeUtil.daysDiv(lastDay, today)
        if div not in ds[OK]:
            ds[OK][div] = 1
        else:
            ds[OK][div] += 1
    for OK in ds:
        print OK, DictUtil.sum(ds[OK])
        print ds[OK]
Пример #5
0
def reportSonsFile(method,version,file,ts,dts,v2=100000):
	if True:
		for line in open(file):
			if line.startswith("版本"):
				continue
			if True:
				try:
					(v,stype,sret)=seqHit.readLine(line)
				except:
					sys.stderr.write(line)
					continue
				tnum=sret.num+1e-32
				if tnum < 1.0:
					continue
				try:
					v=int(v)
				except:
					v=0
				if v >= version and v <= v2 and stype.startswith(method):
					if len(dts) <= 0 or dts[-1][0] != file:
						dts.append((file,{}))
					for type in sret.sonTypes:
						rate=DictUtil.sum(sret.sonTypes[type])
						type=hitItemName.getTypeName(type)
						if type not in dts[-1][1]:
							ts[type]=1
							dts[-1][1][type]=0
						dts[-1][1][type]+=rate
	'''
Пример #6
0
def ds(uuidFile,today):
	regUserFile="/home/zhangzhonghui/data/reg/regUser."+today
	uuids={}
	for line in open(regUserFile):
		cols=line.strip().split()
		uuids[cols[0]]=[cols[1],""]
	for line in open(uuidFile):
		cols=line.split()
		if len(cols) < 4:continue
		uuid=cols[0]
		if uuid not in uuids:continue
		day=cols[2]
		if uuids[uuid][1] == "":
			uuids[uuid][1]=day
		else:
			if uuids[uuid][1] > day:
				uuids[uuid][1]=day
	ds={}
	for uuid in uuids:
		OK,lastDay=uuids[uuid]
		if OK not in ds:
			ds[OK]={}
		div=TimeUtil.daysDiv(lastDay,today)
		if div not in ds[OK]:
			ds[OK][div]=1
		else:
			ds[OK][div]+=1
	for OK in ds:
		print OK,DictUtil.sum(ds[OK])
		print ds[OK]
Пример #7
0
def count():
	ts={}
	tagFile="/home/zhangzhonghui/data/backup.1203/tagQuery.all.txt"
	#tagFile="/home/zhangzhonghui/data/backup.1202/tag.txt"
	for line in open(tagFile):
		ts[line.strip()]=1
		ts["v45_"+line.strip()]=1
	dir=sys.argv[1]
	files=os.listdir(dir)
	files=sorted(files)
	for file in files:
		tck=clickCount.Click()
		ack=clickCount.Click()
		if not file.startswith("searchKeyword45.2014-"):
			continue
		for line in open(dir+"/"+file):
			cols=line.strip().split("\t")
			k=cols[0]
			if k.startswith(searchKeyword45.CardFix):
				continue
			if k.endswith("##total##"):
				(k,click)=clickCount.readClick(cols)
				ack.merge(click)
				continue
			if k not in ts:
				continue
			try:
				(k,click)=clickCount.readClick(cols)
			except:
				sys.stderr.write(line)
				continue
			tck.merge(click)
			voidHit=0
			if 0 in click.hitCount:
				voidHit=click.hitCount[0]
			hit=DictUtil.sum(click.hit)
			if clickCount.SumMark in click.hit:
				hit=click.hit[clickCount.SumMark]
			if click.search==0:
				click.search+=1e-12
			#print k,click.search
			#print "%s\t%s\t%d\t%d\t%d\t%f\t%f"%(file[-10:],k,click.search,hit,(click.search-voidHit),float(hit)/click.search,float(click.search-voidHit)/click.search)
			#print click
		hit=clickCount.dictSum(tck.hit)
		voidHit=0
		if 0 in tck.hitCount:
			voidHit=tck.hitCount[0]
		k="all"
		if tck.search == 0:
			tck.search+=1e-3
		tn=float(hit)/tck.search
		an=float(ack.hit[clickCount.SumMark])/ack.search
		trate=float(tck.search-voidHit)/tck.search
		arate=float(ack.search-ack.hitCount[0])/ack.search
		
		#print "%s\t%f\t%f"%(file[-10:],float(tck.search)/ack.search,trate/arate)
		print "%s\t%d\t%d\t%d\t%d\t%d\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f"%(file[-10:],ack.search,ack.hit[clickCount.SumMark],tck.search,hit,(tck.search-voidHit),float(hit)/tck.search,trate,arate,tn/an,trate/arate)
Пример #8
0
def topnRateStr(d,n=500):
	sd=sorted(d.items(),key=lambda e:e[1],reverse=True)
	nn=0
	sum=DictUtil.sum(d)
	if sum == 0:
		sum+=1e-32
	s=""
	for w,c in sd:
		v=float(c)/sum
		nn+=1
		if nn > n:
			break
		s+="("+str(w)+",%d,%.4f"%(c,v)+")"
	return "["+s+"]"
Пример #9
0
def topnRateStr(d, n=500):
    sd = sorted(d.items(), key=lambda e: e[1], reverse=True)
    nn = 0
    sum = DictUtil.sum(d)
    if sum == 0:
        sum += 1e-32
    s = ""
    for w, c in sd:
        v = float(c) / sum
        nn += 1
        if nn > n:
            break
        s += "(" + str(w) + ",%d,%.4f" % (c, v) + ")"
    return "[" + s + "]"
Пример #10
0
def favRate():
	files=os.listdir("/home/zhangzhonghui/data/seqHit/")
	for file in sorted(files):
		if not file.startswith("20"):
			continue
		for line in open("/home/zhangzhonghui/data/seqHit/"+file):
			if line.startswith("版本"):
				continue
			(v,stype,sret)=seqHit.readLine(line)
			if stype not in recipeInfo:
				continue
			selfNum=sret.num
			if recipeFavMethod in sret.sonTypes:
				favNum=DictUtil.sum(sret.sonTypes[recipeFavMethod])
				print file+"\t"+v+"\t"+stype+"\t%d\t%d\t%.4f"%(selfNum,favNum,favNum/(selfNum+1e-32))
Пример #11
0
def favRate():
    files = os.listdir("/home/zhangzhonghui/data/seqHit/")
    for file in sorted(files):
        if not file.startswith("20"):
            continue
        for line in open("/home/zhangzhonghui/data/seqHit/" + file):
            if line.startswith("版本"):
                continue
            (v, stype, sret) = seqHit.readLine(line)
            if stype not in recipeInfo:
                continue
            selfNum = sret.num
            if recipeFavMethod in sret.sonTypes:
                favNum = DictUtil.sum(sret.sonTypes[recipeFavMethod])
                print file + "\t" + v + "\t" + stype + "\t%d\t%d\t%.4f" % (
                    selfNum, favNum, favNum / (selfNum + 1e-32))
Пример #12
0
def dictSum(d):
	if SumMark not in d:
		return DictUtil.sum(d)
	return d[SumMark]