def activeUserDetail(self, datatype, num, data={}):
     data = data
     uvfile_path = get_uvfile_path(num, datatype, iszip=True)
     log_count = 0
     err_count = 0
     for line in JHOpen().readLines(uvfile_path):
         if not line:
             continue
         log_count += 1
         try:
             items = line.split("\t")
             uid = items[0].strip()
             isactive = True if items[1].strip() == "1" else False
             if not isactive:
                 continue
             comepub = items[2].strip()
             vers = items[5].strip()
             for ver in vers.split("#"):
                 data.setdefault((ver, comepub), set())
                 data.setdefault((ver, "all"), set())
                 data.setdefault(("all", comepub), set())
                 data.setdefault(("all", "all"), set())
                 data[(ver, comepub)].add(uid)
                 data[(ver, "all")].add(uid)
                 data[("all", comepub)].add(uid)
                 data[("all", "all")].add(uid)
         except:
             import traceback
             print(traceback.print_exc())
             err_count += 1
     return data
示例#2
0
    def paths(self, **kwargs):
        logtype = kwargs["logtype"]
        if logtype == "logfile":
            _paths = get_file_path(**kwargs)
            return [
                PathProperty(_path=_path, pathtype="logfile")
                for _path in _paths
            ]
        elif logtype == "uvfile":
            num = kwargs["num"]
            datatype = kwargs["datatype"]
            iszip = kwargs.get("iszip", True)
            _paths = [get_uvfile_path(num, datatype, iszip=iszip)]
            return [
                PathProperty(_path=_path, pathtype="uvfile")
                for _path in _paths
            ]

        # 所有类型日志路径
        if isinstance(logtype, list):
            paths = []
            for _logtype in logtype:
                kwargs = dict(kwargs, **{"logtype": _logtype})
                paths += self.paths(**kwargs)
            return paths
示例#3
0
 def newcomers(self, datatype, num):
     uvfile_path = get_uvfile_path(num, datatype, iszip=True)
     curDay = time.strftime("%Y%m%d",
                            time.localtime(time.time() - 86400 * num))
     result = {}
     for line in JHOpen().readLines(uvfile_path):
         if not line:
             continue
         try:
             items = line.split("\t")
             uid = items[0].strip()
             isnewcomer = True if curDay == items[8][:8] else False
             if not isnewcomer:
                 continue
             isactive = True if items[1].strip() == "1" else False
             if not isactive:
                 continue
             curpub = items[3].strip()
             comepub = items[2].strip(
             ) if items[2].strip() != "#" else curpub.split("#")[0]
             vers = items[5].strip()
             for ver in vers.split("#"):
                 result.setdefault(
                     (ver, comepub), set()).add(uid) if isnewcomer else None
                 result.setdefault(
                     ("all",
                      comepub), set()).add(uid) if isnewcomer else None
                 result.setdefault(
                     (ver, "all"), set()).add(uid) if isnewcomer else None
                 result.setdefault(
                     ("all", "all"), set()).add(uid) if isnewcomer else None
         except:
             import traceback
             print(traceback.print_exc())
     return result
示例#4
0
 def activeUser(self, datatype, num):
     users = set()
     uvfile_path = get_uvfile_path(num, datatype, iszip=True)
     for line in JHOpen().readLines(uvfile_path):
         if not line:
             continue
         try:
             items = line.split("\t")
             uid = items[0].strip()
             isactive = True if items[1].strip() == "1" else False
             if not isactive:
                 continue
             users.add(uid)
         except:
             import traceback
             print(traceback.print_exc())
     return users
示例#5
0
 def rules(self, analysisresult, data, num, *args, **kwargs):
     num += 7
     result = analysisresult.result
     datatype = kwargs["datatype"]
     curDay = time.strftime("%Y%m%d",
                            time.localtime(time.time() - 86400 * num))
     try:
         tmp = {}
         newuserData = self.newcomers(datatype, num)
         newusers = newuserData.get(("all", "all"), set())
         # 计算每一个新增用户的活跃天数
         for i in range(num - 7, num):
             j = num - i
             activeUser = self.activeUser(datatype, j)
             for uid in newusers:
                 tmp.setdefault(uid, [0])
                 if uid in activeUser:
                     tmp.setdefault(uid, [0])[0] += 1
         # 计算新增活跃天数,分版本、分渠道
         uvfile_path = get_uvfile_path(num, datatype, iszip=True)
         for line in JHOpen().readLines(uvfile_path):
             if not line:
                 continue
             try:
                 items = line.split("\t")
                 uid = items[0].strip()
                 isnewcomer = True if curDay == items[8][:8] else False
                 if not isnewcomer:
                     continue
                 curpub = items[3].strip()
                 comepub = items[2].strip(
                 ) if items[2].strip() != "#" else curpub.split("#")[0]
                 vers = items[5].strip()
                 is_allver = True
                 for ver in vers.split("#"):
                     days = tmp.get(uid, [0])[0]
                     result.setdefault(
                         (ver, comepub),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     result.setdefault(
                         (ver, "all"),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     result.setdefault(
                         ("all", comepub),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     result.setdefault(
                         ("all", "all"),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     # 新增
                     result.setdefault((ver, comepub), [{}, 0])[1] += 1
                     result.setdefault((ver, "all"), [{}, 0])[1] += 1
                     result.setdefault(("all", comepub),
                                       [{}, 0])[1] += 1 if is_allver else 0
                     result.setdefault(("all", "all"),
                                       [{}, 0])[1] += 1 if is_allver else 0
                     is_allver = False
             except:
                 import traceback
                 print(traceback.print_exc(), line)
         self.finished = True
     except:
         import traceback
         print(traceback.print_exc())
         self.finished = True
         return False
示例#6
0
 def write(self, dataDict, appkey, modename, modetools, *args, **kwargs):
     num = kwargs["num"]
     client = PyMongoClient(self.mongo_id)
     uvfile_path = get_uvfile_path(num, appkey)
     cur_day = time.strftime("%Y-%m-%d",
                             time.localtime(time.time() - 86400 * num))
     uids = dataDict.keys()
     # userProfile = client.find(appkey, "UserProfile", {"_id": {"$in": uids}})
     uvfile = client.findElemIn(
         appkey, "uvfile", "jhd_userkey", uids,
         OrderedDict([("tm", cur_day)]), {
             "_id": False,
             "jhd_userkey": True,
             "jhd_loc": True,
             "firstLoginTime": True,
             "lastOpaTime": True,
             "jhd_pb": True
         })
     for item in uvfile:
         try:
             uid = item["jhd_userkey"]
             comepub = item.get("jhd_pb", ["#"])
             firstLoginTime = item.get("firstLoginTime", "#")
             lastLoginTime = item.get("lastOpaTime", "#")
             data = dataDict[uid]
             data["firstLoginTime"] = firstLoginTime
             data["lastLoginTime"] = lastLoginTime
             data["comepub"] = comepub[0] if comepub else "#"
             locs = item.get("jhd_loc", None)
             if locs:
                 data["locs"] = []
                 for item in locs:
                     if isinstance(item, dict):
                         prov, city = item.get("prov",
                                               "#"), item.get("city", "#")
                     else:
                         prov, city = item.split("_")
                     if (prov, city) not in data["locs"]:
                         data["locs"].append((prov, city))
         except:
             import traceback
             print traceback.print_exc()
     for key in dataDict:
         data = dataDict[key]
         uid = key
         pushid = data["pushid"]
         plat = data["plat"]
         ua = data["ua"]
         net = "#".join(list(data["net"]))
         curpub = "#".join(list(data["curpubs"]))
         comepub = data["comepub"]
         firstLoginTime = data["firstLoginTime"]
         lastLoginTime = data["lastLoginTime"]
         ver = "#".join(list(data["vers"]))
         loc = "#".join(map(lambda item: "_".join(item), data["locs"]))
         in_num = data["in"][0]
         dur = "#".join(map(str, data["end"])) if data["end"] else "#"
         actions = {}
         [
             actions.setdefault(key, data["action"][key][0])
             for key in data["action"]
         ]
         actionDict = json.dumps(actions)
         pages = {}
         [
             pages.setdefault(key, data["page"][key][0])
             for key in data["page"]
         ]
         pageDict = json.dumps(pages)
         isactive = data["isactive"]
         line = []
         line.append(uid)  # 1
         line.append(isactive)
         line.append(comepub)
         line.append(curpub)
         line.append(plat)  # 5
         line.append(ver)
         line.append(ua)
         line.append(net)
         line.append(firstLoginTime)
         line.append(lastLoginTime)  # 10
         line.append(loc)
         line.append(in_num)
         line.append(dur)
         line.append(actionDict)
         line.append(pageDict)  # 15
         line.append(pushid)  # 16
         JHWrite(uvfile_path, "\t".join(map(str, line)))
     JHWrite.finished(iszip=True)