def count(): ips = {} uuids = {} for line in sys.stdin: cols = line.strip().split("\t") if not column.valid(cols): continue uuid = column.uuid(cols) ip = cols[column.IP_CID] if ip not in ips: ips[ip] = {} if uuid not in uuids: uuids[uuid] = {} ips[ip][uuid] = 1 uuids[uuid][ip] = 1 sum = 0 for ip in ips: sum += len(ips[ip]) print "ipNum\t%d\tipUuidNum\t%d\tuuidPerIp\t%.4f" % (len(ips), sum, float(sum) / len(ips)) sum = 0 for uuid in uuids: sum += len(uuids[uuid]) print "uuidNum\t%d\tuuidIpNum\t%d\tipPerUuuid\t%.4f" % ( len(uuids), sum, float(sum) / len(uuids))
def parseLine(line,type="appLog"): cols=line.strip().split("\t") if type == "userSort": (action,user)=sequence.getActionUser(cols,0) if action == None: return (None,None) if user == None: return (None,None) item=hitItemName.getPosName(action) return (item,user.name()) elif type == "appLog": if not column.valid(cols): return (None,None) method=cols[column.METHOD_CID] para=cols[column.PARA_ID] version=column.intVersion(cols[column.VERSION_CID]) item=getActionItem.getActionItem(method,para,version) name=hitItemName.getMethodName(method,para) offset=column.getValue(para,"offset") if offset == "0": name+=hitItemName.SplitSign+"0" item=hitItemName.packItem(name,item) uuid=column.uuid(cols) return (item,uuid) return (None,None)
def t2(): us = {} nus = {} ms = {} nms = {} for line in open("/home/zhangzhonghui/data/In.txt"): cols = line.strip().split() u = cols[2] us[u] = {} for line in sys.stdin: cols = line.strip().split("\t") if not column.valid(cols): continue uuid = column.uuid(cols) method = cols[column.METHOD_CID] if uuid not in us: nus[uuid] = 1 if method not in nms: nms[method] = 1 else: nms[method] += 1 continue if method not in ms: ms[method] = 1 else: ms[method] += 1 for m in ms: nn = 0 if m in nms: nn = nms[m] if ms[m] > 100: print m, ms[m], nn, float( ms[m]) * len(nus) / float(nn + 1e-12) / float(len(us)) print "notIn user:"******"other user:", len(nus)
def t2(): us={} nus={} ms={} nms={} for line in open("/home/zhangzhonghui/data/In.txt"): cols=line.strip().split() u=cols[2] us[u]={} for line in sys.stdin: cols=line.strip().split("\t") if not column.valid(cols): continue uuid=column.uuid(cols) method=cols[column.METHOD_CID] if uuid not in us: nus[uuid]=1 if method not in nms: nms[method]=1 else: nms[method]+=1 continue if method not in ms: ms[method]=1 else: ms[method]+=1 for m in ms: nn=0 if m in nms: nn=nms[m] if ms[m] > 100: print m,ms[m],nn,float(ms[m])*len(nus)/float(nn+1e-12)/float(len(us)) print "notIn user:"******"other user:",len(nus)
def countSearch(file): tags=readCates(file) for line in sys.stdin: if line.find("search.getlist") < 0: continue cols=line.strip().split("\t") if not column.valid(cols): continue scene=column.getValue(cols[column.PARA_ID],"scene") if scene == "t2": continue tagid=column.getValue(cols[column.PARA_ID],"tagid") if tagid != None and tagid != "" and scene == "t1": print tagid continue keyword=column.getValue(cols[column.PARA_ID],"keyword") if keyword != None and keyword in tags: print tags[keyword]
def qid(f): lastQLine="" for line in f: line=line.strip() if line.startswith('{"status":'): q=column.getQid2(line,"request_id") if q != None and q != "": print q+"\t"+line elif line.startswith('{"remote_addr":"'): ip=mlog.getU(line) time=mlog.value(line,'"log_time":') if ip != "" and ip != None: print str(random.randint(0,99))+"\t"+ip+"\t"+time+"\t"+line.strip() else: cols=line.split("\t") if not column.valid(cols): continue q=column.getValue(cols[column.PARA_ID],"request_id") if q != None and q != "": print q+"\t"+line else: print str(random.randint(0,99))+"\t"+line
def appLogHit(aid): channels={} for line in sys.stdin: cols=line.strip().split("\t") if not column.valid(cols): continue method=cols[column.METHOD_CID] if method != "info.getalbuminfo": continue version=column.intVersion(cols[column.VERSION_CID]) if version < 400: continue taid=column.getValue(cols[column.PARA_ID],"aid") if taid != aid: continue appid=cols[column.APPID_CID] if appid not in channels: channels[appid]=1 else: channels[appid]+=1 for appid in channels: print "%s\t%d"%(appid,channels[appid])
def appLogHit(aid): channels = {} for line in sys.stdin: cols = line.strip().split("\t") if not column.valid(cols): continue method = cols[column.METHOD_CID] if method != "info.getalbuminfo": continue version = column.intVersion(cols[column.VERSION_CID]) if version < 400: continue taid = column.getValue(cols[column.PARA_ID], "aid") if taid != aid: continue appid = cols[column.APPID_CID] if appid not in channels: channels[appid] = 1 else: channels[appid] += 1 for appid in channels: print "%s\t%d" % (appid, channels[appid])
def count(): ips={} uuids={} for line in sys.stdin: cols=line.strip().split("\t") if not column.valid(cols): continue uuid=column.uuid(cols) ip=cols[column.IP_CID] if ip not in ips: ips[ip]={} if uuid not in uuids: uuids[uuid]={} ips[ip][uuid]=1 uuids[uuid][ip]=1 sum=0 for ip in ips: sum+=len(ips[ip]) print "ipNum\t%d\tipUuidNum\t%d\tuuidPerIp\t%.4f"%(len(ips),sum,float(sum)/len(ips)) sum=0 for uuid in uuids: sum+=len(uuids[uuid]) print "uuidNum\t%d\tuuidIpNum\t%d\tipPerUuuid\t%.4f"%(len(uuids),sum,float(sum)/len(uuids))
def qid(f): lastQLine = "" for line in f: line = line.strip() if line.startswith('{"status":'): q = column.getQid2(line, "request_id") if q != None and q != "": print q + "\t" + line elif line.startswith('{"remote_addr":"'): ip = mlog.getU(line) time = mlog.value(line, '"log_time":') if ip != "" and ip != None: print str(random.randint( 0, 99)) + "\t" + ip + "\t" + time + "\t" + line.strip() else: cols = line.split("\t") if not column.valid(cols): continue q = column.getValue(cols[column.PARA_ID], "request_id") if q != None and q != "": print q + "\t" + line else: print str(random.randint(0, 99)) + "\t" + line
def map(): us = {} for line in sys.stdin: cols = line.strip().split("\t") if len(cols) == Reduce_Cols and cols[1] in types: #过去的数据累加 print line.strip() continue if not column.valid(cols): continue uid = column.uid(cols) uuid = column.uuidOnly(cols) ip = cols[column.IP_CID] time = cols[column.TIME_CID] channel = cols[column.MEDIA_CID] if uid != None and len(uid) != 0: addItem2(U_Type, uuid, time, us, Uid_Fix + uid) addItem2(Channel_Type, channel, time, us, Uid_Fix + uid) addItem2(Ip_Type, ip, time, us, Uid_Fix + uid) if uuid != None and len(uuid) != 0: addItem2(U_Type, uid, time, us, Uuid_Fix + uuid) addItem2(Channel_Type, channel, time, us, Uuid_Fix + uuid) addItem2(Ip_Type, ip, time, us, Uuid_Fix + uuid) for u in us: output(u, us[u])
def map(): us={} for line in sys.stdin: cols=line.strip().split("\t") if len(cols) == Reduce_Cols and cols[1] in types: #过去的数据累加 print line.strip() continue if not column.valid(cols): continue uid=column.uid(cols) uuid=column.uuidOnly(cols) ip=cols[column.IP_CID] time=cols[column.TIME_CID] channel=cols[column.MEDIA_CID] if uid != None and len(uid) !=0: addItem2(U_Type,uuid,time,us,Uid_Fix+uid) addItem2(Channel_Type,channel,time,us,Uid_Fix+uid) addItem2(Ip_Type,ip,time,us,Uid_Fix+uid) if uuid != None and len(uuid) !=0: addItem2(U_Type,uid,time,us,Uuid_Fix+uuid) addItem2(Channel_Type,channel,time,us,Uuid_Fix+uuid) addItem2(Ip_Type,ip,time,us,Uuid_Fix+uuid) for u in us: output(u,us[u])