def sonFavRate(): print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率" files=os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue day=file[file.rfind("/")+1:] ts={} for line in open("/home/zhangzhonghui/data/seqHit/"+file): if line.startswith("版本"): continue (v,stype,sret)=seqHit.readLine(line) selfNum=0 for st in sret.sonTypes: if st in recipeInfo: selfNum+=DictUtil.sum(sret.sonTypes[st]) if selfNum > 0: if recipeFavMethod in sret.gsonTypes: favNum=DictUtil.sum(sret.gsonTypes[recipeFavMethod]) if stype not in ts: ts[stype]=[0,0,0] ts[stype][0]+=sret.num ts[stype][1]+=selfNum ts[stype][2]+=favNum print day+"\t"+v+"\t"+stype+favStr(sret.num,selfNum,favNum) #"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32)) for stype in ts: (num,hnum,favNum)=ts[stype] print day+"\tall_version\t"+stype+favStr(num,hnum,favNum)
def count(): allCount = HitRet() actions = [] userinfo = None for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < 2: sys.stderr.write(line) continue if cols[1] == "-1": output(actions) actions = [] userinfo = UserInfo.readUserInfo(cols) continue if userinfo == None: sys.stderr.write("userinfo is None:" + line) continue action = Action.readAction(cols, userinfo) if action == None: sys.stderr.write("action is None:" + line) continue if action.id > len(actions): sys.stderr.write("action.id > len(actions):id%d-len:%d:\t" % (action.id, len(actions)) + line) DictUtil.addOne(allCount.wrongs, 1) continue actions.append(action) output(actions) for version in cs: for stype in cs[version]: p = stype.find("_") if p < 0: p = len(stype) method = stype[0:p] print "%s\t%s\t%s" % (version, stype, cs[version][stype])
def addAction(action, actions, ret): ret.num += 1 DictUtil.addOne(ret.pages, len(action.pull)) (sts, swr) = getSonTypes(action.id, actions) addTs(sts, ret.sonTypes, ret.sons) (gts, gwr) = getGsonTypes(action.id, actions) addTs(gts, ret.gsonTypes, ret.gsons)
def sonFavRate(): print "日期\t版本\t栏目名\t栏目进入数\t列表页菜谱点击数\t菜谱收藏数\t菜谱点击率\t菜谱收藏率\t栏目菜谱收藏率" files = os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue day = file[file.rfind("/") + 1:] ts = {} for line in open("/home/zhangzhonghui/data/seqHit/" + file): if line.startswith("版本"): continue (v, stype, sret) = seqHit.readLine(line) selfNum = 0 for st in sret.sonTypes: if st in recipeInfo: selfNum += DictUtil.sum(sret.sonTypes[st]) if selfNum > 0: if recipeFavMethod in sret.gsonTypes: favNum = DictUtil.sum(sret.gsonTypes[recipeFavMethod]) if stype not in ts: ts[stype] = [0, 0, 0] ts[stype][0] += sret.num ts[stype][1] += selfNum ts[stype][2] += favNum print day + "\t" + v + "\t" + stype + favStr( sret.num, selfNum, favNum) #"\t%d\t%d\t%d\t%.4f\t%.4f\t%.4f"%(sret.num,selfNum,favNum,selfNum/(sret.num+1e-32),favNum/(selfNum+1e-32),favNum/(sret.num+1e-32)) for stype in ts: (num, hnum, favNum) = ts[stype] print day + "\tall_version\t" + stype + favStr(num, hnum, favNum)
def addTs(sts, totalTypes, sums): ssum = 0 for t in sts: ssum += sts[t] if t not in totalTypes: totalTypes[t] = {} DictUtil.addOne(totalTypes[t], sts[t]) DictUtil.addOne(sums, ssum)
def merge(self, other): self.num += other.num DictUtil.merge(self.pages, other.pages) DictUtil.merge(self.sonNums, other.sonNums) for item in other.sons: if item not in self.sons: self.sons[item] = [0, 0, 0] self.sons[item][0] += other.sons[item][0] self.sons[item][1] += other.sons[item][1] self.sons[item][2] += other.sons[item][2]
def merge(self,other): self.num+=other.num DictUtil.merge(self.pages,other.pages) DictUtil.merge(self.sonNums,other.sonNums) for item in other.sons: if item not in self.sons: self.sons[item]=[0,0,0] self.sons[item][0]+=other.sons[item][0] self.sons[item][1]+=other.sons[item][1] self.sons[item][2]+=other.sons[item][2]
def testReadAllDict(): f_handler=open(FeedDir+'feedback.log', 'w') sys.stdout=f_handler classDict,tDict,wDict,eleDict=readAllDict() print DictUtil.dictStr(tDict) print DictUtil.dictStr(wDict) print DictUtil.dictStr(classDict) print DictUtil.dictStr(eleDict)
def testReadAllDict(): f_handler = open(FeedDir + 'feedback.log', 'w') sys.stdout = f_handler classDict, tDict, wDict, eleDict = readAllDict() print DictUtil.dictStr(tDict) print DictUtil.dictStr(wDict) print DictUtil.dictStr(classDict) print DictUtil.dictStr(eleDict)
def readFeedback(lineOrFeeds,eleDict,begin="0000-00-00 00:00:00",end="9999-99-99 00:00:00"): signs={} for line in open(FeedDir+"sign.txt"): w=line.strip() if len(w) == 0: continue signs[utf8.un(line.strip())]=1 signs[u" "]=1 signs[u"\t"]=1 n=0 wordCount={} biCount={} for e in eleDict: seg.MaxSeg.addW(e) repeats={} for ele in lineOrFeeds: n+=1 if n <=0:continue if type(ele) == Feed: feed=ele else: feed=parseFeedLine(ele,repeats) if feed == None or (feed.createTime < begin or feed.createTime > end): #if feed != None:print feed.createTime continue ws=seg.MaxSeg.getSeg().maxSeg(feed.content) dws={} lastW="" bn=0 for w in ws: if w not in dws: DictUtil.addOne(wordCount,w) dws[w]=1 if bn > 0 and w not in signs and lastW not in signs: if lastW+w not in dws: DictUtil.addOne(biCount,lastW+w) dws[lastW+w]=1 bn+=1 lastW=w if n < 10:print feed #else:break for bi in biCount: #二元组合打折计数 bc=biCount[bi] if bi not in wordCount: if bc > 3:wordCount[bi]=bc else: wordCount[bi]+=bc return wordCount
def ds(uuidFile,today): regUserFile="/home/zhangzhonghui/data/reg/regUser."+today uuids={} for line in open(regUserFile): cols=line.strip().split() uuids[cols[0]]=[cols[1],""] for line in open(uuidFile): cols=line.split() if len(cols) < 4:continue uuid=cols[0] if uuid not in uuids:continue day=cols[2] if uuids[uuid][1] == "": uuids[uuid][1]=day else: if uuids[uuid][1] > day: uuids[uuid][1]=day ds={} for uuid in uuids: OK,lastDay=uuids[uuid] if OK not in ds: ds[OK]={} div=TimeUtil.daysDiv(lastDay,today) if div not in ds[OK]: ds[OK][div]=1 else: ds[OK][div]+=1 for OK in ds: print OK,DictUtil.sum(ds[OK]) print ds[OK]
def reportSonsFile(method,version,file,ts,dts,v2=100000): if True: for line in open(file): if line.startswith("版本"): continue if True: try: (v,stype,sret)=seqHit.readLine(line) except: sys.stderr.write(line) continue tnum=sret.num+1e-32 if tnum < 1.0: continue try: v=int(v) except: v=0 if v >= version and v <= v2 and stype.startswith(method): if len(dts) <= 0 or dts[-1][0] != file: dts.append((file,{})) for type in sret.sonTypes: rate=DictUtil.sum(sret.sonTypes[type]) type=hitItemName.getTypeName(type) if type not in dts[-1][1]: ts[type]=1 dts[-1][1][type]=0 dts[-1][1][type]+=rate '''
def ds(uuidFile, today): regUserFile = "/home/zhangzhonghui/data/reg/regUser." + today uuids = {} for line in open(regUserFile): cols = line.strip().split() uuids[cols[0]] = [cols[1], ""] for line in open(uuidFile): cols = line.split() if len(cols) < 4: continue uuid = cols[0] if uuid not in uuids: continue day = cols[2] if uuids[uuid][1] == "": uuids[uuid][1] = day else: if uuids[uuid][1] > day: uuids[uuid][1] = day ds = {} for uuid in uuids: OK, lastDay = uuids[uuid] if OK not in ds: ds[OK] = {} div = TimeUtil.daysDiv(lastDay, today) if div not in ds[OK]: ds[OK][div] = 1 else: ds[OK][div] += 1 for OK in ds: print OK, DictUtil.sum(ds[OK]) print ds[OK]
def getSonTypes(id, actions): wrong = 0 ts = {} for sid in actions[id].sons: if sid >= len(actions): wrong += 1 continue DictUtil.addOne(ts, hitItemName.getName(actions[sid])) for pid in actions[id].pull: if pid >= len(actions): wrong += 1 continue (pts, pwr) = getSonTypes(pid, actions) DictUtil.merge(ts, pts) wrong += pwr return (ts, wrong)
def reportSonsFile(method, version, file, ts, dts, v2=100000): if True: for line in open(file): if line.startswith("版本"): continue if True: try: (v, stype, sret) = seqHit.readLine(line) except: sys.stderr.write(line) continue tnum = sret.num + 1e-32 if tnum < 1.0: continue try: v = int(v) except: v = 0 if v >= version and v <= v2 and stype.startswith(method): if len(dts) <= 0 or dts[-1][0] != file: dts.append((file, {})) for type in sret.sonTypes: rate = DictUtil.sum(sret.sonTypes[type]) type = hitItemName.getTypeName(type) if type not in dts[-1][1]: ts[type] = 1 dts[-1][1][type] = 0 dts[-1][1][type] += rate '''
def count(): ts={} tagFile="/home/zhangzhonghui/data/backup.1203/tagQuery.all.txt" #tagFile="/home/zhangzhonghui/data/backup.1202/tag.txt" for line in open(tagFile): ts[line.strip()]=1 ts["v45_"+line.strip()]=1 dir=sys.argv[1] files=os.listdir(dir) files=sorted(files) for file in files: tck=clickCount.Click() ack=clickCount.Click() if not file.startswith("searchKeyword45.2014-"): continue for line in open(dir+"/"+file): cols=line.strip().split("\t") k=cols[0] if k.startswith(searchKeyword45.CardFix): continue if k.endswith("##total##"): (k,click)=clickCount.readClick(cols) ack.merge(click) continue if k not in ts: continue try: (k,click)=clickCount.readClick(cols) except: sys.stderr.write(line) continue tck.merge(click) voidHit=0 if 0 in click.hitCount: voidHit=click.hitCount[0] hit=DictUtil.sum(click.hit) if clickCount.SumMark in click.hit: hit=click.hit[clickCount.SumMark] if click.search==0: click.search+=1e-12 #print k,click.search #print "%s\t%s\t%d\t%d\t%d\t%f\t%f"%(file[-10:],k,click.search,hit,(click.search-voidHit),float(hit)/click.search,float(click.search-voidHit)/click.search) #print click hit=clickCount.dictSum(tck.hit) voidHit=0 if 0 in tck.hitCount: voidHit=tck.hitCount[0] k="all" if tck.search == 0: tck.search+=1e-3 tn=float(hit)/tck.search an=float(ack.hit[clickCount.SumMark])/ack.search trate=float(tck.search-voidHit)/tck.search arate=float(ack.search-ack.hitCount[0])/ack.search #print "%s\t%f\t%f"%(file[-10:],float(tck.search)/ack.search,trate/arate) print "%s\t%d\t%d\t%d\t%d\t%d\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f"%(file[-10:],ack.search,ack.hit[clickCount.SumMark],tck.search,hit,(tck.search-voidHit),float(hit)/tck.search,trate,arate,tn/an,trate/arate)
def cosineWeit(d1, d2): t1 = {} for w in d1: if w in tagWeit: t1[w] = d1[w] * tagWeit[w] t2 = {} for w in d2: if w in tagWeit: t2[w] = d2[w] * tagWeit[w] return DictUtil.cosine(t1, t2)
def cosineWeit(d1,d2): t1={} for w in d1: if w in tagWeit: t1[w]=d1[w]*tagWeit[w] t2={} for w in d2: if w in tagWeit: t2[w]=d2[w]*tagWeit[w] return DictUtil.cosine(t1,t2)
def __str__(self): #if len(self.hit) <= 0: # return "" return self.user.name() + "\t" + str( self.id ) + "\t" + self.time + "\t" + self.method + "\t" + self.para + "\t" + str( self.response) + "\t" + str(self.hit) + "\t" + str( self.sons) + "\t" + str(self.pull) + "\t" + str( self.parentId) + "\t" + self.parentMethod + "\t" + str( self.getItem()) + "\t" + DictUtil.listStr( self.resultItems())
def merge(self,cc): self.posShow=DictUtil.merge(self.posShow,cc.posShow) self.posHit=DictUtil.merge(self.posHit,cc.posHit) self.keywordHit=DictUtil.merge(self.keywordHit,cc.keywordHit) self.keywordShow=DictUtil.merge(self.keywordShow,cc.keywordShow) self.ridShow=DictUtil.merge(self.ridShow,cc.ridShow) self.ridHit=DictUtil.merge(self.ridHit,cc.ridHit)
def t7(tag="增长下降比:"): vs=[] for line in sys.stdin: p=line.find(tag) if p >= 0: p1=line.find("=",p+len(tag)) if p1 < 0: p1=line.find(" ",p+len(tag)) if p1 < 0: p1=line.find("\t",p+len(tag)) v=float(line[p+len(tag):p1]) vs.append(v) (sum,avg,std,s0)=DictUtil.statis(vs) sys.stderr.write("sum:%.4f loss:%d,%.2f avg:%.2f%% std:%.4f avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,std,avg/(std+1e-64)))
def topnRateStr(d, n=500): sd = sorted(d.items(), key=lambda e: e[1], reverse=True) nn = 0 sum = DictUtil.sum(d) if sum == 0: sum += 1e-32 s = "" for w, c in sd: v = float(c) / sum nn += 1 if nn > n: break s += "(" + str(w) + ",%d,%.4f" % (c, v) + ")" return "[" + s + "]"
def topnRateStr(d,n=500): sd=sorted(d.items(),key=lambda e:e[1],reverse=True) nn=0 sum=DictUtil.sum(d) if sum == 0: sum+=1e-32 s="" for w,c in sd: v=float(c)/sum nn+=1 if nn > n: break s+="("+str(w)+",%d,%.4f"%(c,v)+")" return "["+s+"]"
def count(): allCount=PosRet() actions=[] userinfo=None for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < 2: sys.stderr.write(line) continue if cols[1] == "-1": output(actions) actions=[] userinfo=UserInfo.readUserInfo(cols) continue if userinfo == None: sys.stderr.write("userinfo is None:"+line) continue action=Action.readAction(cols,userinfo) if action == None: sys.stderr.write("action is None:"+line) continue if action.id > len(actions): sys.stderr.write("action.id > len(actions):id%d-len:%d:\t"%(action.id,len(actions))+line) DictUtil.addOne(allCount.wrongs,1) continue actions.append(action) output(actions) for version in cs: for stype in cs[version]: p=stype.find("_") if p < 0: p=len(stype) method=stype[0:p] cstr=str(cs[version][stype]) if len(stype) == 0: stype="__VOID__" print "%s\t%s\t%s"%(stype,version,cstr)
def favRate(): files=os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue for line in open("/home/zhangzhonghui/data/seqHit/"+file): if line.startswith("版本"): continue (v,stype,sret)=seqHit.readLine(line) if stype not in recipeInfo: continue selfNum=sret.num if recipeFavMethod in sret.sonTypes: favNum=DictUtil.sum(sret.sonTypes[recipeFavMethod]) print file+"\t"+v+"\t"+stype+"\t%d\t%d\t%.4f"%(selfNum,favNum,favNum/(selfNum+1e-32))
def t7(tag="增长下降比:"): vs = [] for line in sys.stdin: p = line.find(tag) if p >= 0: p1 = line.find("=", p + len(tag)) if p1 < 0: p1 = line.find(" ", p + len(tag)) if p1 < 0: p1 = line.find("\t", p + len(tag)) v = float(line[p + len(tag):p1]) vs.append(v) (sum, avg, std, s0) = DictUtil.statis(vs) sys.stderr.write( "sum:%.4f loss:%d,%.2f avg:%.2f%% std:%.4f avg/std:%.4f\n" % (sum, s0, float(s0) / len(vs), avg * 100, std, avg / (std + 1e-64)))
def favRate(): files = os.listdir("/home/zhangzhonghui/data/seqHit/") for file in sorted(files): if not file.startswith("20"): continue for line in open("/home/zhangzhonghui/data/seqHit/" + file): if line.startswith("版本"): continue (v, stype, sret) = seqHit.readLine(line) if stype not in recipeInfo: continue selfNum = sret.num if recipeFavMethod in sret.sonTypes: favNum = DictUtil.sum(sret.sonTypes[recipeFavMethod]) print file + "\t" + v + "\t" + stype + "\t%d\t%d\t%.4f" % ( selfNum, favNum, favNum / (selfNum + 1e-32))
def t3(): sum=0 n=0 vs=[] for line in sys.stdin: if line.startswith("增长率:"): p=line.find("\t") try: v=float(line[len("增长率:"):p]) except: continue if abs(v) > 100: #print line continue #print v sum+=v n+=1 vs.append(v) #sys.stderr.write("sum:%f\tn:%d\tavg:%f\n"%(sum,n,sum/n)) (sum,avg,std,s0)=DictUtil.statis(vs) sys.stderr.write("sum:%.4f loss:%d,%.2f avg:%.2f%% std:%.4f avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,std,avg/std))
def addDay(self,user,dayRet): self.addChannel(user) DictUtil.addOne(self.hits,dayRet.hitNum) DictUtil.addOne(self.lastDivs,dayRet.lastDiv) DictUtil.addOne(self.firstDivs,dayRet.firstDiv) for m in dayRet.ms: if m not in self.mu: self.mu[m]=1 self.mc[m]=dayRet.ms[m] else: self.mu[m]+=1 self.mc[m]+=dayRet.ms[m] for t in dayRet.mts: if t not in self.tu: self.tu[t]=1 else: self.tu[t]+=1
def addDay(self, user, dayRet): self.addChannel(user) DictUtil.addOne(self.hits, dayRet.hitNum) DictUtil.addOne(self.lastDivs, dayRet.lastDiv) DictUtil.addOne(self.firstDivs, dayRet.firstDiv) for m in dayRet.ms: if m not in self.mu: self.mu[m] = 1 self.mc[m] = dayRet.ms[m] else: self.mu[m] += 1 self.mc[m] += dayRet.ms[m] for t in dayRet.mts: if t not in self.tu: self.tu[t] = 1 else: self.tu[t] += 1
def t3(): sum = 0 n = 0 vs = [] for line in sys.stdin: if line.startswith("增长率:"): p = line.find("\t") try: v = float(line[len("增长率:"):p]) except: continue if abs(v) > 100: #print line continue #print v sum += v n += 1 vs.append(v) #sys.stderr.write("sum:%f\tn:%d\tavg:%f\n"%(sum,n,sum/n)) (sum, avg, std, s0) = DictUtil.statis(vs) sys.stderr.write( "sum:%.4f loss:%d,%.2f avg:%.2f%% std:%.4f avg/std:%.4f\n" % (sum, s0, float(s0) / len(vs), avg * 100, std, avg / std))
def distribute(): vs={} ms={} cs={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) < column.APP_LOG_COLUMNS: continue version=cols[column.VERSION_CID] channel=cols[column.MEDIA_CID] method=cols[column.METHOD_CID] uid=column.uid(cols) if uid == "": uid=None if method in regMs and uid == None: DictUtil.addOne(vs,version) DictUtil.addOne(ms,method) DictUtil.addOne(cs,channel) for v in vs: print "%s\t%d"%(v,vs[v]) for m in ms: print "%s\t%d"%(m,ms[m]) for c in cs: print "%s\t%d"%(c,cs[c])
def distribute(): vs = {} ms = {} cs = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) < column.APP_LOG_COLUMNS: continue version = cols[column.VERSION_CID] channel = cols[column.MEDIA_CID] method = cols[column.METHOD_CID] uid = column.uid(cols) if uid == "": uid = None if method in regMs and uid == None: DictUtil.addOne(vs, version) DictUtil.addOne(ms, method) DictUtil.addOne(cs, channel) for v in vs: print "%s\t%d" % (v, vs[v]) for m in ms: print "%s\t%d" % (m, ms[m]) for c in cs: print "%s\t%d" % (c, cs[c])
def merge(self,ck): self.search+=ck.search self.hasHit+=ck.hasHit self.weitShow=DictUtil.merge(self.weitShow,ck.weitShow) self.posHit=DictUtil.merge(self.posHit,ck.posHit) self.hit=DictUtil.merge(self.hit,ck.hit) self.show=DictUtil.merge(self.show,ck.show) self.posShow=DictUtil.merge(self.posShow,ck.posShow) self.albumShow=DictUtil.merge(self.albumShow,ck.albumShow) self.albumHit=DictUtil.merge(self.albumHit,ck.albumHit) self.topicShow=DictUtil.merge(self.topicShow,ck.topicShow) self.topicHit=DictUtil.merge(self.topicHit,ck.topicHit) self.foodShow=DictUtil.merge(self.foodShow,ck.foodShow) self.foodHit=DictUtil.merge(self.foodHit,ck.foodHit) self.hitCount=DictUtil.merge(self.hitCount,ck.hitCount) self.hitTitleShow=DictUtil.merge(self.hitTitleShow,ck.hitTitleShow) self.hitTitleHit=DictUtil.merge(self.hitTitleHit,ck.hitTitleHit) if self.ms == None: self.ms=ck.ms else: self.ms.merge(ck.ms)
def dictSum(d): if SumMark not in d: return DictUtil.sum(d) return d[SumMark]
def merge(self, dr): self.userNum += dr.userNum DictUtil.merge(self.channels, dr.channels) DictUtil.merge(self.hits, dr.hits) DictUtil.merge(self.firstDivs, dr.firstDivs) DictUtil.merge(self.lastDivs, dr.lastDivs) DictUtil.merge(self.mc, dr.mc) DictUtil.merge(self.mu, dr.mu) DictUtil.merge(self.tu, dr.tu)
def readLine(f): cates=DBCateName.readCateFile(open("/home/zhangzhonghui/log-mining/com/haodou/log-mining/util/cateidName.txt")) counts={} tc={} for line in f: if line.find("moreSearch") < 0: continue cols=line.strip().split("\t") if len(cols) < 11 or cols[-10] != "moreSearch": continue #print cols[-10],cols[-9],cols[-11] k=cols[0] if True: #if k != "ck45_##total##": ts=eval(cols[-3]) if type(ts) == int: print line print cols[-4] print ts tshow=eval(cols[-2]) rtn=int(cols[-1]) if k not in counts: counts[k]=[0,0,{},{},0] counts[k][0]+=int(cols[-9]) counts[k][4]+=rtn tns=counts[k][2] tnShow=counts[k][3] sum=0 for t in ts: sum+=ts[t] tn=cates[int(t)] if tn not in tns: tns[tn]=0 if tn not in tc: tc[tn]=[0,0] if not k.startswith("ck45"): tc[tn][0]+=ts[t] tns[tn]+=ts[t] showNum=0 for t in tshow: if tshow[t] > showNum: showNum=tshow[t] tn=cates[int(t)] if tn not in tc: tc[tn]=[0,0] if not k.startswith("ck45"): tc[tn][1]+=int(cols[-9]) if tn not in tnShow: tnShow[tn]=0 tnShow[tn]+=tshow[t] counts[k][1]+=showNum #print "%s\t%s\t%s\t%s\t%.4f"%(cols[0],cols[32],cols[33],DictUtil.dictStr(tns),sum/(int(cols[32])+1e-32)) for k in counts: (c,hc,ts,tshow,rtn)=counts[k] sum=0 for tn in ts: sum+=ts[tn] ssum=0 for tn in tshow: ssum+=tshow[tn] if ssum <= 0: continue print "%s\t%d\t%d\t%.4f\t%d\t%.4f\t%d\t%.4f\t%s\t%s"%(k,sum,c,sum/(c+1e-2),hc,sum/(hc+1e-2),rtn,rtn/(c+1e-12),DictUtil.dictStr(ts),DictUtil.dictStr(tshow)) for t in tc: (v,s)=tc[t] print "%s\t%d\t%d\t%.4f"%(t,v,s,v/float(s+1e-2))
def addChannel(self, user): self.userNum += 1 media = "" if user.uuid != None and user.uuid in us: media = us[user.uuid] DictUtil.addOne(self.channels, media)
def addHitCount(self,hc): DictUtil.addOne(self.hitCount,hc)
def addChannel(self,user): self.userNum+=1 media="" if user.uuid != None and user.uuid in us: media=us[user.uuid] DictUtil.addOne(self.channels,media)
def addHitTitleHit(self,title,type,keyword): DictUtil.addOne(self.hitTitleHit,hitTitle(title,keyword)+type)
def merge(self, other): self.num += other.num DictUtil.merge(self.pages, other.pages) DictUtil.merge(self.sons, other.sons) DictUtil.merge2(self.sonTypes, other.sonTypes) DictUtil.merge(self.gsons, other.gsons) DictUtil.merge2(self.gsonTypes, other.gsonTypes) DictUtil.merge(self.wrongs, other.wrongs)
def addAlbumHit(self,album): DictUtil.addOne(self.albumHit,album)
def addFoodHit(self,foodId): DictUtil.addOne(self.foodHit,foodId)
def addSearch(self,keyword,rid,pos): DictUtil.addOne(self.keywordShow,keyword) DictUtil.addOne(self.posShow,pos) DictUtil.addOne(self.ridShow,rid)
def addTopicHit(self,topic): DictUtil.addOne(self.topicHit,topic)
def combine(f,conf,start=0,end=10000): lastDay="" curve=loss.Loss(1.0) firstMonth=-1 lastMonth=0 lastValue=1.0 vs=[] yvs=[] lastYear=0 lastYearValue=1.0 es=[] state=stepBack.State() for line in f: cols=line.strip().split("\t") if len(cols) < 3: continue day=cols[0] year=int(cols[0].split("-")[0]) if year < start or year > end: continue month=int(cols[0].split("-")[1]) month=year*100+month if firstMonth <= 0: firstMonth=month if lastMonth <= 0: lastMonth=month lastYear=year if lastMonth!=month: rate=curve.now/lastValue-1.0 sys.stderr.write("%d-%d\t%.2f%%\n"%(lastMonth/100,lastMonth%100,rate*100)) vs.append(rate) lastMonth=month lastValue=curve.now if lastYear != year: rate=curve.now/lastYearValue-1.0 yvs.append((lastYear,rate)) lastYear=year lastYearValue=curve.now if lastDay == "": lastDay=day if lastDay != day: output(lastDay,es,curve,conf) lastDay=day es=[] e=float(cols[1]) state.read(cols[2]) if state.IsBull: fac=TrendFac else: fac=1.0/TrendFac if state.LastClose5: fac*=Close5Fac else: fac/=Close5Fac es.append((e,fac)) if lastDay != "": rate=curve.now/lastValue-1.0 sys.stderr.write("%d-%d\t%.2f%%\n"%(lastMonth/100,lastMonth%100,rate*100)) vs.append(rate) rate=curve.now/lastYearValue-1.0 yvs.append((lastYear,rate)) output(lastDay,es,curve,conf) es=[] sys.stderr.write("firstMonth:%d\tlastMonth:%d\n"%(firstMonth,lastMonth)) curve.setTime(dayDiv(firstMonth,lastMonth)) sys.stderr.write(curve.confStr(conf)+"\n") (sum,avg,std,s0)=DictUtil.statis(vs) mrate=math.pow(curve.now,1.0/monthDiv(firstMonth,lastMonth))-1.0 sys.stderr.write("月均统计 sum:%.4f loss:%d,%.2f avg:%.2f%%,%.2f%% std:%.4f avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,mrate*100,std,mrate/std)) rvs=[] for lastYear,rate in yvs: rvs.append(rate) sys.stderr.write("%d\t%.2f%%\n"%(lastYear,rate*100)) (sum,avg,std,s0)=DictUtil.statis(rvs) yrate=curve.rate sys.stderr.write("年均统计 sum:%.4f loss:%d,%.2f avg:%.2f%%,%.2f%% std:%.4f avg/std:%.4f\n"%(sum,s0,float(s0)/len(vs),avg*100,yrate*100,std,yrate/std))
def addAction(action,actions,ret): ret.num+=1 DictUtil.addOne(ret.pages,len(action.pull)) (hs,wrong)=getSonTypes(action.id,actions) hit=addTs(hs,ret.sons) DictUtil.addOne(ret.sonNums,hit)
def addHit(self,keyword,rid,pos): DictUtil.addOne(self.keywordHit,keyword) DictUtil.addOne(self.posHit,pos) DictUtil.addOne(self.ridHit,rid)
def merge(self,dr): self.userNum+=dr.userNum DictUtil.merge(self.channels,dr.channels) DictUtil.merge(self.hits,dr.hits) DictUtil.merge(self.firstDivs,dr.firstDivs) DictUtil.merge(self.lastDivs,dr.lastDivs) DictUtil.merge(self.mc,dr.mc) DictUtil.merge(self.mu,dr.mu) DictUtil.merge(self.tu,dr.tu)
def addAction(action, actions, ret): ret.num += 1 DictUtil.addOne(ret.pages, len(action.pull)) (hs, wrong) = getSonTypes(action.id, actions) hit = addTs(hs, ret.sons) DictUtil.addOne(ret.sonNums, hit)