Пример #1
0
class UserProfileUpdateWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.conn = self.client.getConn()
        self.modename = "UserProfile"

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        a = time.time()

        ip_loc = kwargs["ip_loc"]
        curDay = kwargs["today"].replace("-", "")
        # fix_deltaday = getDayDelta(curDay, "20160101")
        for uid in data:
            for ip in data[uid]["jhd_ip"]:
                loc = ip_loc.get(ip, {})
                if not loc:
                    continue
                prov = loc.get("prov", "#")
                city = loc.get("city", "#")

                loc_data = {"prov": prov, "city": city}

                data[uid].setdefault("locs", []).append(loc_data)

        op = []
        for uid in data:
            locs = data[uid].get("locs", [])
            if not locs:
                continue
            for loc in locs:
                op.append(UpdateOne({"_id": uid}, {"$addToSet": {
                    "locs": loc
                }}))

        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        print("UserProfileUpdateWriter cost seconds %.3f" %
              ((time.time() - a), ))
Пример #2
0
class UserDefineMapMeta(object):
    def __init__(self):
        pass

    def get_mongoid(self, appkey):
        m_client = MysqlClient()
        self.mongo_id = m_client.get_mongoid(appkey)[0]
        m_client.closeMysql()

    def getData(self, appkey, *args, **kwargs):
        modename = "UserMapMeta"
        self.get_mongoid(appkey)
        self.client = PyMongoClient(self.mongo_id)
        cur = self.client.getConn()[appkey][modename].find({})
        result = {}
        # format: { "_id" : "ac7", "fields" : [ { "type" : "string", "name" : "type" } ], "field_elems" : { "type" : [ "分类视图" ] } }
        for item in cur:
            eventid = item["_id"]
            fields = item["fields"]
            field_elems = item.get("field_elems", {})
            # 保存 type/elems
            for field in fields:
                try:
                    mapkey = field["name"]
                    mapkey_type = field["type"]
                    elems = field_elems.get(mapkey, [])
                    elems_tmp = []
                    for elem in elems:
                        if isinstance(elem, str) or isinstance(elem, unicode):
                            # 排除URL类型属性
                            if "http" in elem:
                                continue
                            if len(elem) >= 60:
                                continue
                        elems_tmp.append(elem)
                        if len(elems_tmp) >= 100:
                            break
                    result.setdefault(eventid,
                                      {}).setdefault(mapkey, {
                                          "type": mapkey_type,
                                          "elems": elems_tmp,
                                      })
                except:
                    continue
            # 不包含map的情况
            if bool(fields) == False:
                result.setdefault(eventid, {})
        return result
Пример #3
0
class IPStorageMG(IPStorage):
    def __init__(self):
        self.client = PyMongoClient()
        self.conn = self.client.getConn()

    def store(self, _id, **kwargs):
        op = UpdateOne({"_id": _id}, {"$set": dict({}, **kwargs)}, False)
        self.client.bulkWrite("jh", "UserIP", [op])

    def storeItem(self, _id, key, value):
        pass

    def bulkStore(self, data):
        op = []
        for key in data:
            _id = key
            op.append(
                UpdateOne({"_id": _id}, {"$set": dict({}, **data[key])},
                          False))
        self.client.bulkWrite("jh", "UserIP", op)
Пример #4
0
class ModeWriteMongoUserIP(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.dbname = "jh"
        self.modename = "UserIP"

    def remove(self, *args, **kwargs):
        pass

    def write(self, *args, **kwargs):
        '''
        :param args: 保留参数
        :param kwargs: today = 当天日期(yyyy-mm-dd)
        :return:
        '''
        today = kwargs["today"]
        cur = self.conn[self.dbname][self.modename].find(
            {
                "timestamp": {
                    "$gte": time.time() - 100
                },
                "province": {
                    "$exists": True
                },
                "city": {
                    "$exists": True
                },
                "appkey": {
                    "$exists": True
                }
            }, {
                "province": True,
                "city": True,
                "appkey": True
            })
        update_appkey = {}
        for item in cur:
            ip = item["_id"]
            province = item["province"]
            city = item["city"]
            if not province:
                continue
            if not city:
                city = province
            appkey = item["appkey"]
            for a_appkey in appkey:
                update_appkey.setdefault(a_appkey, []).append(
                    UpdateOne(
                        OrderedDict([("tm", today),
                                     ("jhd_loc", {
                                         "$exista": False
                                     }), ("jhd_ip", ip)]),  # 需要配合索引使用提高更新速度
                        {
                            "$addToSet": {
                                "jhd_loc": {
                                    "prov": province,
                                    "city": city
                                }
                            }
                        }))
        for a_appkey in update_appkey:
            a = time.time()
            self.client.bulkWrite(a_appkey, "uvfile", update_appkey[a_appkey])
            print(__name__,
                  time.time() - a, a_appkey, len(update_appkey[a_appkey]))
Пример #5
0
class UserActiveWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.modename = "UserActive"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        # 格式化
        tm = time.strftime(
            "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d"))))
        self.client.remove(appkey, modename, {"partition_date": tm})

    # def write(self, appkey, modename, curDay=time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)), *args, **kwargs):
    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        curDay = kwargs["today"].replace(
            "-", "") if "today" in kwargs else time.strftime(
                "%Y%m%d", time.localtime(time.time() - 86400))
        conn = self.client.getConn()
        userActiveCollection = conn[appkey][modename]
        docs = self.client.find(appkey, "UserProfile", {})
        yesterday = getDay(curDay, "%Y%m%d", -1)
        op = []
        a = time.time()
        for doc in docs:
            try:
                key = doc["_id"]
                activelife = doc.get("activelife", [0])
                firstLoginTime = doc["firstLoginTime"][:8]
                login = getDayDelta(curDay, firstLoginTime) in activelife
                # 查找前一天的用户活跃记录,需要配合索引提升速度db.UserActive.ensureIndex({partition_date: -1, jh_uid: 1})
                userActive = userActiveCollection.find_one({
                    "jh_uid":
                    key,
                    "partition_date":
                    yesterday
                })
                # 构造今天的用户记录
                newUserActive = UserActiveBuilder()
                newUserActive.setJhdUid(key)
                newUserActive.setPartitionDate(curDay)
                if userActive is None:
                    newUserActive.setActive([1] if login else [0])
                else:
                    userActive["active"].append(1 if login else 0)
                    newUserActive.setActive(userActive["active"])
                # 计算衡量指标
                newUserActive.setFirstLoginTime(doc["firstLoginTime"])
                newUserActive.setLastLoginTime(doc["lastLoginTime"])
                op.append(
                    ReplaceOne({
                        "jh_uid": key,
                        "partition_date": curDay
                    },
                               newUserActive.builder(),
                               upsert=True))
            except:
                import traceback
                print(traceback.print_exc(), doc)
        print("find cost time: %d" % int(time.time() - a))
        # print("len(op): ", len(op), "yesterday: ", yesterday)
        try:
            if op:
                userActiveCollection.bulk_write(op)
        except:
            import traceback
            print(traceback.print_exc())
            print(
                "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    userActiveCollection.bulk_write([op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserActiveWriter cost seconds %.10f" % ((time.time() - a), ))
Пример #6
0
class UserProfileWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.modename = "UserProfile"

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        curDay = kwargs["today"].replace("-", "")
        fix_deltaday = getDayDelta(curDay, "20160101")
        modename = self.modename
        uids = data.keys()

        docs = self.client.find(appkey, modename, {"_id": {"$in": uids}})

        oldusers = set()
        for doc in docs:
            try:
                # UserProfile _id 为 userkey
                key = doc["_id"]
                oldusers.add(key)
                # 如果新添加数据比首次访问时间要早,对历史数据进行修正处理
                if "lastLoginTime" in data[key]:
                    lastLoginTime_new = data[key]["lastLoginTime"][:8]
                if "firstLoginTime" in doc and "firstLoginTime" in data[key]:
                    activelife = doc.get("activelife", [0])
                    firstLoginTime_new = data[key]["firstLoginTime"][:8]
                    firstLoginTime_old = doc["firstLoginTime"][:8]
                    if firstLoginTime_new < firstLoginTime_old:
                        firstLoginDelta = getDayDelta(firstLoginTime_old,
                                                      firstLoginTime_new)
                        doc["activelife"] = map(lambda i: i + firstLoginDelta,
                                                activelife)

                data[key] = modetools.mergeUserProfile(data[key], doc)
                # 生成用户生命周期数据
                firstLoginDay = data[key]["firstLoginTime"][:8]
                lastLoginDay = data[key]["lastLoginTime"][:8]
                dayDelta = getDayDelta(lastLoginTime_new, firstLoginDay)
                data[key].setdefault("activelife", [0])  # 兼容历史数据
                if dayDelta not in data[key]["activelife"]:
                    data[key]["activelife"].append(dayDelta)
                data[key]["activelife"].sort()
                # 用户绝对活跃数据,起始 日期为 2016-01-01
                try:
                    firstlogin_deltaday = getDayDelta(firstLoginDay,
                                                      "20160101")
                    data[key]["activelifeabs"] = [
                        firstlogin_deltaday + remain_day
                        for remain_day in data[key]["activelife"]
                    ]
                except:
                    import traceback
                    print(traceback.print_exc())
            except:
                import traceback
                print(traceback.print_exc())
        # 设置新增版本
        # for key in set(uids)-set([item["_id"] for item in docs]):
        for key in set(uids) - oldusers:
            data[key]["comever"] = data[key]["ver"]
            # 用户绝对活跃数据,起始 日期为 2016-01-01
            try:
                firstLoginDay = data[key]["lastLoginTime"][:8]
                firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101")
                data[key]["activelifeabs"] = [
                    firstlogin_deltaday + remain_day
                    for remain_day in data[key]["activelife"]
                ]
            except:
                import traceback
                print(traceback.print_exc())

        op = []
        for key in data:
            op.append(ReplaceOne({"_id": key}, data[key], True))
        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserProfileWriter cost seconds %.3f" % ((time.time() - a), ))
Пример #7
0
class UserCrumbsWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.modename = "uvfile"
        # self.store_attachmode = UserIP()
        self.attachmode_storers = []
        try:
            # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()]
            self.attachmode_storers = [UserIP()]
        except:
            import traceback
            print(traceback.print_exc())

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        tm = time.strftime(
            "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm,
                                                                 "%Y%m%d"))))
        self.client.remove(appkey, modename, {"tm": tm})

    def getMeasure(self, activelifeabs, fix_deltaday):
        activelifeabs = [i for i in activelifeabs if i <= fix_deltaday]
        measure = {
            "last7ActiveNum": 0,
            "last14ActiveNum": 0,
            "last28ActiveNum": 0,
            "last30ActiveNum": 0,
        }
        for activelifeabs_delta in activelifeabs:
            delta = fix_deltaday - activelifeabs_delta
            if delta <= 6:
                measure["last7ActiveNum"] += 1
            if delta <= 13:
                measure["last14ActiveNum"] += 1
            if delta <= 27:
                measure["last28ActiveNum"] += 1
            if delta <= 29:
                measure["last30ActiveNum"] += 1
        return measure

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400))
        today = kwargs["today"]
        today = today.replace("-", "")
        uids = data.keys()
        yesterday = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(today, "%Y%m%d")) - 86400))
        yyyy_mm_dd = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(yesterday.replace("-", ""),
                                          "%Y%m%d")) + 86400))

        uvfile = self.client.find(
            appkey, "uvfile",
            OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", {
                "$in": uids
            })]))

        user_profile = self.client.find(appkey, "UserProfile",
                                        {"_id": {
                                            "$in": uids
                                        }})

        ips = set()
        ip_loc = {}
        try:
            for uid in data:
                ips = ips.union(data[uid].get("jhd_ip"))
            ip_loc_cur = self.conn["jh"]["UserIP"].find(
                OrderedDict([("_id", {
                    "$in": list(ips)
                }), ("province", {
                    "$exists": True
                }), ("city", {
                    "$exists": True
                })]), {
                    "province": True,
                    "city": True
                })
            for item in ip_loc_cur:
                ip = item["_id"]
                province = item["province"]
                city = item["city"]
                if not province:
                    continue
                if not city:
                    city = province
                # ip_loc.setdefault(ip, "_".join([province, city]))
                ip_loc.setdefault(ip, {"prov": province, "city": city})
        except:
            import traceback
            print traceback.print_exc()

        # 合并 数据
        for doc in uvfile:
            uid = doc["jhd_userkey"]
            data[uid] = modetools.mergeUserCrumbs(doc, data[uid])
            try:
                ip_lis = data[uid]["jhd_ip"]
                data[uid].setdefault("jhd_loc", [])

                for ip in ip_lis:
                    loc = ip_loc.get(ip, None)
                    if loc and loc not in data[uid]["jhd_loc"]:
                        data[uid]["jhd_loc"].append(loc)
                # tmp = []
                # for item in data[uid]["jhd_loc"]:
                #     if isinstance(item, dict):
                #         tmp.append(item)
                # data[uid]["jhd_loc"] = tmp
            except:
                import traceback
                print traceback.print_exc()

        fix_deltaday = getDayDelta(today, "20160101")

        # lastActiveInterval
        # firstLoginTime
        for doc in user_profile:
            # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", []))
            key = doc["_id"]
            tmp = {}
            # 获取用户首次登录时间
            tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown")
            firstloginday = tmp["firstLoginTime"][:8]
            activelifeabs = doc.get("activelifeabs", [])
            # 获取用户最近最近活跃信息
            tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday)
            tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"]
            # 更新数据
            data[key] = dict(data[key], **tmp)
        op = []
        for key in data:

            if "_id" not in data[key]:
                data[key]["_id"] = ObjectId()
            _id = data[key]["_id"]
            op.append(
                ReplaceOne({"_id": _id}, modetools.formatList(data[key]),
                           True))

        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode")
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        try:
            kwargs["ip_loc"] = ip_loc
            self.store_attachmode(data, appkey, modename, modetools, *args,
                                  **kwargs)
        except:
            import traceback
            print traceback.print_exc()
Пример #8
0
def get_mongo_conn(appkey):
    m_client = MysqlClient()
    mongo_id = m_client.get_mongoid(appkey)[0]
    m_client.closeMysql()
    conn = PyMongoClient(mongo_id=mongo_id)
    return conn.getConn()