def activeUserDetail(self, datatype, num, data={}): data = data uvfile_path = get_uvfile_path(num, datatype, iszip=True) log_count = 0 err_count = 0 for line in JHOpen().readLines(uvfile_path): if not line: continue log_count += 1 try: items = line.split("\t") uid = items[0].strip() isactive = True if items[1].strip() == "1" else False if not isactive: continue comepub = items[2].strip() vers = items[5].strip() for ver in vers.split("#"): data.setdefault((ver, comepub), set()) data.setdefault((ver, "all"), set()) data.setdefault(("all", comepub), set()) data.setdefault(("all", "all"), set()) data[(ver, comepub)].add(uid) data[(ver, "all")].add(uid) data[("all", comepub)].add(uid) data[("all", "all")].add(uid) except: import traceback print(traceback.print_exc()) err_count += 1 return data
def newcomers(self, datatype, num): uvfile_path = get_uvfile_path(num, datatype, iszip=True) curDay = time.strftime("%Y%m%d", time.localtime(time.time() - 86400 * num)) result = {} for line in JHOpen().readLines(uvfile_path): if not line: continue try: items = line.split("\t") uid = items[0].strip() isnewcomer = True if curDay == items[8][:8] else False if not isnewcomer: continue isactive = True if items[1].strip() == "1" else False if not isactive: continue curpub = items[3].strip() comepub = items[2].strip( ) if items[2].strip() != "#" else curpub.split("#")[0] vers = items[5].strip() for ver in vers.split("#"): result.setdefault( (ver, comepub), set()).add(uid) if isnewcomer else None result.setdefault( ("all", comepub), set()).add(uid) if isnewcomer else None result.setdefault( (ver, "all"), set()).add(uid) if isnewcomer else None result.setdefault( ("all", "all"), set()).add(uid) if isnewcomer else None except: import traceback print(traceback.print_exc()) return result
def pipeline(self, path): for line in JHOpen().readLines(path): if not line: continue try: data = self.transform.transform(line) yield [data] except: import traceback print(traceback.print_exc()) yield []
def pipeline(self, path): for line in JHOpen().readLines(path): if not line: continue try: data = json.loads(line.strip()) yield [data] except: import traceback print(traceback.print_exc()) yield []
def activeUser(self, datatype, num): users = set() uvfile_path = get_uvfile_path(num, datatype, iszip=True) for line in JHOpen().readLines(uvfile_path): if not line: continue try: items = line.split("\t") uid = items[0].strip() isactive = True if items[1].strip() == "1" else False if not isactive: continue users.add(uid) except: import traceback print(traceback.print_exc()) return users
def getData(self, datatype, num): curDay = time.strftime("%Y%m%d", time.localtime(time.time() - 86400 * num)) paths = self.daily_paths if self.daily_paths else get_file_path( datatype=datatype, yyyymmdd=curDay, hhmm="2359", last=1440) result = {} for path in paths: for line in JHOpen().readLines(path): if not line: continue try: data = json.loads(line) optype = data["type"].strip() if optype != "ac": continue eventid = data["event"].strip() result.setdefault(optype, set()).add(eventid) except: import traceback print(traceback.print_exc()) print(line) return result
def pipline(self, path): for line in JHOpen().readLines(path): if not line: continue yield [line]
def pipeline(self, path): for line in JHOpen().readLines(path): if not line: continue data = json.loads(line) yield [data]
def rules(self, analysisresult, data, num, *args, **kwargs): num += 7 result = analysisresult.result datatype = kwargs["datatype"] curDay = time.strftime("%Y%m%d", time.localtime(time.time() - 86400 * num)) try: tmp = {} newuserData = self.newcomers(datatype, num) newusers = newuserData.get(("all", "all"), set()) # 计算每一个新增用户的活跃天数 for i in range(num - 7, num): j = num - i activeUser = self.activeUser(datatype, j) for uid in newusers: tmp.setdefault(uid, [0]) if uid in activeUser: tmp.setdefault(uid, [0])[0] += 1 # 计算新增活跃天数,分版本、分渠道 uvfile_path = get_uvfile_path(num, datatype, iszip=True) for line in JHOpen().readLines(uvfile_path): if not line: continue try: items = line.split("\t") uid = items[0].strip() isnewcomer = True if curDay == items[8][:8] else False if not isnewcomer: continue curpub = items[3].strip() comepub = items[2].strip( ) if items[2].strip() != "#" else curpub.split("#")[0] vers = items[5].strip() is_allver = True for ver in vers.split("#"): days = tmp.get(uid, [0])[0] result.setdefault( (ver, comepub), [{}, 0])[0].setdefault(days, set()).add(uid) result.setdefault( (ver, "all"), [{}, 0])[0].setdefault(days, set()).add(uid) result.setdefault( ("all", comepub), [{}, 0])[0].setdefault(days, set()).add(uid) result.setdefault( ("all", "all"), [{}, 0])[0].setdefault(days, set()).add(uid) # 新增 result.setdefault((ver, comepub), [{}, 0])[1] += 1 result.setdefault((ver, "all"), [{}, 0])[1] += 1 result.setdefault(("all", comepub), [{}, 0])[1] += 1 if is_allver else 0 result.setdefault(("all", "all"), [{}, 0])[1] += 1 if is_allver else 0 is_allver = False except: import traceback print(traceback.print_exc(), line) self.finished = True except: import traceback print(traceback.print_exc()) self.finished = True return False
# --coding=utf8-- import os import os.path as path from SaaSCommon.JHOpen import JHOpen import json from SaaSMode.EventDetailH5 import H5EventDetail import datetime from DBClient.PostgreSqlClient import PostgreSqlClient if __name__ == "__main__": dataPath = os.sep.join([path.dirname(path.abspath(__file__)), "data"]) inputStream = JHOpen.readLines(dataPath) data = [] for line in inputStream: item = dict(json.loads(line)) temp = H5EventDetail() type = item.get("type") if type == "page": temp.support = item.get("support") temp.usermap = item.get("usermap") temp.ref = item.get("ref") elif type == "dur": temp.status = item.get("status") temp.value = item.get("value") elif type == "ac": temp.event = item.get("event") temp.appkey = item.get("appkey") temp.type = item.get("type") temp.uri = item.get("uri") temp.uid = item.get("uid") temp.opatime = datetime.datetime.fromtimestamp(item.get("ts") / 1000)