def __init__(self, client=None): if client is None: self.client = PyMongoClient() else: self.client = client self.dbname = "jh" self.modename = "UserIP"
class UserIP(ModeWriter): ''' { _id: [ip], inctag: [number], timestamp: [number] } ''' def __init__(self, client=None): if client is None: self.client = PyMongoClient() else: self.client = client self.dbname = "jh" self.modename = "UserIP" def setClient(self, client): self.client = client def remove(self, *args, **kwargs): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename ip_set = set() for key in data: # uid = key for ip in data[key]["jhd_ip"]: ip_set.add(ip) op = [] for ip in ip_set: query_update = { "$set": { "timestamp": time.time() }, "$inc": { "inctag": 1 }, "$addToSet": { "appkey": appkey } } op.append(UpdateOne({"_id": ip}, query_update, upsert=True)) try: if op: self.client.bulkWrite(self.dbname, modename, op) except: print( "Warn: bulkStore 'jh.UserIP' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(self.dbname, modename, [op_item]) except: import traceback print(traceback.print_exc())
class UserProfileUpdateWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn() self.modename = "UserProfile" def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename a = time.time() ip_loc = kwargs["ip_loc"] curDay = kwargs["today"].replace("-", "") # fix_deltaday = getDayDelta(curDay, "20160101") for uid in data: for ip in data[uid]["jhd_ip"]: loc = ip_loc.get(ip, {}) if not loc: continue prov = loc.get("prov", "#") city = loc.get("city", "#") loc_data = {"prov": prov, "city": city} data[uid].setdefault("locs", []).append(loc_data) op = [] for uid in data: locs = data[uid].get("locs", []) if not locs: continue for loc in locs: op.append(UpdateOne({"_id": uid}, {"$addToSet": { "locs": loc }})) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) print("UserProfileUpdateWriter cost seconds %.3f" % ((time.time() - a), ))
class UserEventWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.groupwriter = UserEventGroupWriter() self.modename = "UserEvent" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") # 格式化 tm = time.strftime( "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"partition_date": tm}) self.groupwriter.remove(appkey, modename, tm) def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename op = [] for doc in data: try: # doc["_id"] = ObjectId() doc["partition_date"] = kwargs["today"].replace("-", "") \ if "today" in kwargs else time.strftime("%Y%m%d", time.localtime(time.time()-86400)) jhd_userkey = doc["jhd_userkey"] jhd_ts = doc["jhd_ts"] jhd_eventId = doc["jhd_eventId"] _id = "_".join( map(str, [jhd_userkey, jhd_ts, jhd_eventId[:10]])) doc["_id"] = _id # op.append(InsertOne(doc)) op.append(ReplaceOne({"_id": _id}, doc, True)) except: import traceback print(traceback.print_exc(), doc) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) self.groupwriter.write(data, appkey, modename, modetools, *args, **kwargs) print("UserEventWriter cost seconds %.3f" % ((time.time() - a), ))
def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "uvfile" # self.store_attachmode = UserIP() self.attachmode_storers = [] try: # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()] self.attachmode_storers = [UserIP()] except: import traceback print(traceback.print_exc())
class UserActiveUpdateWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.modename = "UserActiveUpdate" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename curDay = kwargs["today"].replace("-", "") # self.client.getConn()[appkey][modename].remove({}) # data_count = self.client.getConn()[appkey][modename].find({}).count() # if data_count == 0: # data_cur = self.client.getConn()[appkey]["UserActive"].find({"partition_date": "20161114"}, {"jh_uid": 1}) # for item in data_cur: # self.client.getConn()[appkey][modename].insert({"_id": item["jh_uid"]}) activelifeabs = getDayDelta(curDay, "20160101") update_query = {"$addToSet": {"activelifeabs": activelifeabs}} op = [] for key in data: # key is userkey try: uid = key op.append(UpdateOne({"_id": uid}, update_query, True)) except: import traceback print(traceback.print_exc(), key, data[data]) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: import traceback print(traceback.print_exc()) print( "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserActiveUpdateWriter cost seconds %.10f" % ((time.time() - a), ))
def usetimeDistribute(num, appkey="BIQU_ANDROID", delta=120): # def usetimeDistribute(num, appkey="biqu", delta=120): curday = datetime.datetime.today().strftime("%Y%m%d") dayStr = time.strftime("%Y-%m-%d", time.localtime(time.time() - 86400 * num)) client = PyMongoClient() result = {} # for item in client.find(appkey, "uvfile", {"tm": dayStr, "jhd_userkey": userkey}): m, n = 0, 0 for item in client.find(appkey, "uvfile", {"tm": dayStr}): opas = ["action", "page", "in", "end"] uid = item["jhd_userkey"] end_sum = item["item_add"].get("end", 0) opatms = list( set( reduce( lambda a, b: a + b, map( lambda opa: item["item_count"].get(opa, {}).get( "opatm", []), opas)))) opatms.sort() opsdtsmps = map( lambda opatm: int( time.mktime( time.strptime("".join([curday, opatm]), "%Y%m%d%H:%M:%S")) ), opatms) tmp = [ 0, ] for opastamp, pos in zip(opsdtsmps, range(len(opsdtsmps) - 1)): a = opsdtsmps[pos] b = opsdtsmps[pos + 1] tmp.append(b - a) if end_sum >= 600: print(uid, end_sum, sum([i for i in tmp if i <= delta]), tmp) print(uid, end_sum, sum([i for i in tmp if i <= delta]), opatms) m += 1 print(i, end_sum) else: print(uid, end_sum, sum([i for i in tmp if i <= delta]), tmp) print(uid, end_sum, sum([i for i in tmp if i <= delta]), opatms) n += 1 total_opatm = sum([i for i in tmp if i <= delta]) if total_opatm != 0: result.setdefault(uid, total_opatm) print(m, n)
class IPStorageMG(IPStorage): def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn() def store(self, _id, **kwargs): op = UpdateOne({"_id": _id}, {"$set": dict({}, **kwargs)}, False) self.client.bulkWrite("jh", "UserIP", [op]) def storeItem(self, _id, key, value): pass def bulkStore(self, data): op = [] for key in data: _id = key op.append( UpdateOne({"_id": _id}, {"$set": dict({}, **data[key])}, False)) self.client.bulkWrite("jh", "UserIP", op)
class IPLoaderFromMG(IPLoader): ''' load 需要定位的IP地址 -> IPContainer(StackSet) ''' def __init__(self): self.client = PyMongoClient() self.IPContainer = StackSet() def load(self): t = threading.Thread(target=self.load_thread, name="Thread_LoadIPFromMG") t.setDaemon(True) t.start() def load_thread(self, once_sleep=30): logger.info("IP Loader Starting.......") _counter = 0 while True: if self.IPContainer.size() >= 10000: time.sleep(once_sleep) continue size_before = self.IPContainer.size() cur = self.client.find( "jh", "UserIP", { "timestamp": { "$gte": time.time() - 5 * 60 }, "city": { "$exists": False } }) for item in cur: ip = item["_id"] self.IPContainer.push(ip) _counter += 1 size_after = self.IPContainer.size() # print self.IPContainer.items logger.info( "Total Read IP: %s, IPContainer has ip: %d, load ip: %d" % (_counter, size_after, size_after - size_before)) time.sleep(once_sleep) def iter(self): while True: try: item = self.IPContainer.pop() except IndexError: import traceback logger.warning("IPContainer is empty!") time.sleep(10) continue yield item
def getData(self, appkey, *args, **kwargs): modename = "UserMapMeta" self.get_mongoid(appkey) self.client = PyMongoClient(self.mongo_id) cur = self.client.getConn()[appkey][modename].find({}) result = {} # format: { "_id" : "ac7", "fields" : [ { "type" : "string", "name" : "type" } ], "field_elems" : { "type" : [ "分类视图" ] } } for item in cur: eventid = item["_id"] fields = item["fields"] field_elems = item.get("field_elems", {}) # 保存 type/elems for field in fields: try: mapkey = field["name"] mapkey_type = field["type"] elems = field_elems.get(mapkey, []) elems_tmp = [] for elem in elems: if isinstance(elem, str) or isinstance(elem, unicode): # 排除URL类型属性 if "http" in elem: continue if len(elem) >= 60: continue elems_tmp.append(elem) if len(elems_tmp) >= 100: break result.setdefault(eventid, {}).setdefault(mapkey, { "type": mapkey_type, "elems": elems_tmp, }) except: continue # 不包含map的情况 if bool(fields) == False: result.setdefault(eventid, {}) return result
def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.dbname = "jh" self.modename = "UserIP"
def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.modename = "UserMapMeta"
def __init__(self): self.client = PyMongoClient()
def __init__(self): self.client = PyMongoClient() self.modename = "UserActiveUpdate"
class UserMapMetaWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.modename = "UserMapMeta" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = "UserMapMeta" pass def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() # self.client.getConn()[appkey][modename].remove({}) modename = self.modename op = [] for _data in data: try: if not _data: continue update_query = { "$addToSet": { "fields": { "$each": _data.pop("fields") } } } _id = _data.pop("_id") op.append(UpdateOne({"_id": _id}, update_query, True)) for key in _data["field_elems"].keys(): update_query = { "$addToSet": { "field_elems.%s" % key: { "$each": [ item for item in _data["field_elems"].pop(key) ] } } } # import json # print "-"*100, json.dumps(update_query) # 元素个数 >100 的不更新 op.append( UpdateOne( { "_id": _id, "field_elems.%s.100" % (key, ): { "$exists": False } }, update_query, True)) except: import traceback print(traceback.print_exc(), op) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) # self.client.getConn()[appkey][modename].remove({"field_elems": {"$exists": True}}) finallyMask(appkey, modename, self.client) print("UserMapMetaWriter cost seconds %.10f" % ((time.time() - a), ))
class UserCrumbsWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "uvfile" # self.store_attachmode = UserIP() self.attachmode_storers = [] try: # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()] self.attachmode_storers = [UserIP()] except: import traceback print(traceback.print_exc()) def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") tm = time.strftime( "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"tm": tm}) def getMeasure(self, activelifeabs, fix_deltaday): activelifeabs = [i for i in activelifeabs if i <= fix_deltaday] measure = { "last7ActiveNum": 0, "last14ActiveNum": 0, "last28ActiveNum": 0, "last30ActiveNum": 0, } for activelifeabs_delta in activelifeabs: delta = fix_deltaday - activelifeabs_delta if delta <= 6: measure["last7ActiveNum"] += 1 if delta <= 13: measure["last14ActiveNum"] += 1 if delta <= 27: measure["last28ActiveNum"] += 1 if delta <= 29: measure["last30ActiveNum"] += 1 return measure def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)) today = kwargs["today"] today = today.replace("-", "") uids = data.keys() yesterday = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(today, "%Y%m%d")) - 86400)) yyyy_mm_dd = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(yesterday.replace("-", ""), "%Y%m%d")) + 86400)) uvfile = self.client.find( appkey, "uvfile", OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", { "$in": uids })])) user_profile = self.client.find(appkey, "UserProfile", {"_id": { "$in": uids }}) ips = set() ip_loc = {} try: for uid in data: ips = ips.union(data[uid].get("jhd_ip")) ip_loc_cur = self.conn["jh"]["UserIP"].find( OrderedDict([("_id", { "$in": list(ips) }), ("province", { "$exists": True }), ("city", { "$exists": True })]), { "province": True, "city": True }) for item in ip_loc_cur: ip = item["_id"] province = item["province"] city = item["city"] if not province: continue if not city: city = province # ip_loc.setdefault(ip, "_".join([province, city])) ip_loc.setdefault(ip, {"prov": province, "city": city}) except: import traceback print traceback.print_exc() # 合并 数据 for doc in uvfile: uid = doc["jhd_userkey"] data[uid] = modetools.mergeUserCrumbs(doc, data[uid]) try: ip_lis = data[uid]["jhd_ip"] data[uid].setdefault("jhd_loc", []) for ip in ip_lis: loc = ip_loc.get(ip, None) if loc and loc not in data[uid]["jhd_loc"]: data[uid]["jhd_loc"].append(loc) # tmp = [] # for item in data[uid]["jhd_loc"]: # if isinstance(item, dict): # tmp.append(item) # data[uid]["jhd_loc"] = tmp except: import traceback print traceback.print_exc() fix_deltaday = getDayDelta(today, "20160101") # lastActiveInterval # firstLoginTime for doc in user_profile: # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", [])) key = doc["_id"] tmp = {} # 获取用户首次登录时间 tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown") firstloginday = tmp["firstLoginTime"][:8] activelifeabs = doc.get("activelifeabs", []) # 获取用户最近最近活跃信息 tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday) tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"] # 更新数据 data[key] = dict(data[key], **tmp) op = [] for key in data: if "_id" not in data[key]: data[key]["_id"] = ObjectId() _id = data[key]["_id"] op.append( ReplaceOne({"_id": _id}, modetools.formatList(data[key]), True)) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode") try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) try: kwargs["ip_loc"] = ip_loc self.store_attachmode(data, appkey, modename, modetools, *args, **kwargs) except: import traceback print traceback.print_exc()
def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn()
def __init__(self): self.client = PyMongoClient() self.groupwriter = UserEventGroupWriter() self.modename = "UserEvent"
def __init__(self): self.client = PyMongoClient() self.modename = "UserEventGroup"
class UserProfileWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "UserProfile" def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() curDay = kwargs["today"].replace("-", "") fix_deltaday = getDayDelta(curDay, "20160101") modename = self.modename uids = data.keys() docs = self.client.find(appkey, modename, {"_id": {"$in": uids}}) oldusers = set() for doc in docs: try: # UserProfile _id 为 userkey key = doc["_id"] oldusers.add(key) # 如果新添加数据比首次访问时间要早,对历史数据进行修正处理 if "lastLoginTime" in data[key]: lastLoginTime_new = data[key]["lastLoginTime"][:8] if "firstLoginTime" in doc and "firstLoginTime" in data[key]: activelife = doc.get("activelife", [0]) firstLoginTime_new = data[key]["firstLoginTime"][:8] firstLoginTime_old = doc["firstLoginTime"][:8] if firstLoginTime_new < firstLoginTime_old: firstLoginDelta = getDayDelta(firstLoginTime_old, firstLoginTime_new) doc["activelife"] = map(lambda i: i + firstLoginDelta, activelife) data[key] = modetools.mergeUserProfile(data[key], doc) # 生成用户生命周期数据 firstLoginDay = data[key]["firstLoginTime"][:8] lastLoginDay = data[key]["lastLoginTime"][:8] dayDelta = getDayDelta(lastLoginTime_new, firstLoginDay) data[key].setdefault("activelife", [0]) # 兼容历史数据 if dayDelta not in data[key]["activelife"]: data[key]["activelife"].append(dayDelta) data[key]["activelife"].sort() # 用户绝对活跃数据,起始 日期为 2016-01-01 try: firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101") data[key]["activelifeabs"] = [ firstlogin_deltaday + remain_day for remain_day in data[key]["activelife"] ] except: import traceback print(traceback.print_exc()) except: import traceback print(traceback.print_exc()) # 设置新增版本 # for key in set(uids)-set([item["_id"] for item in docs]): for key in set(uids) - oldusers: data[key]["comever"] = data[key]["ver"] # 用户绝对活跃数据,起始 日期为 2016-01-01 try: firstLoginDay = data[key]["lastLoginTime"][:8] firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101") data[key]["activelifeabs"] = [ firstlogin_deltaday + remain_day for remain_day in data[key]["activelife"] ] except: import traceback print(traceback.print_exc()) op = [] for key in data: op.append(ReplaceOne({"_id": key}, data[key], True)) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserProfileWriter cost seconds %.3f" % ((time.time() - a), ))
def get_mongo_conn(appkey): m_client = MysqlClient() mongo_id = m_client.get_mongoid(appkey)[0] m_client.closeMysql() conn = PyMongoClient(mongo_id=mongo_id) return conn.getConn()
def write(self, dataDict, appkey, modename, modetools, *args, **kwargs): num = kwargs["num"] client = PyMongoClient(self.mongo_id) uvfile_path = get_uvfile_path(num, appkey) cur_day = time.strftime("%Y-%m-%d", time.localtime(time.time() - 86400 * num)) uids = dataDict.keys() # userProfile = client.find(appkey, "UserProfile", {"_id": {"$in": uids}}) uvfile = client.findElemIn( appkey, "uvfile", "jhd_userkey", uids, OrderedDict([("tm", cur_day)]), { "_id": False, "jhd_userkey": True, "jhd_loc": True, "firstLoginTime": True, "lastOpaTime": True, "jhd_pb": True }) for item in uvfile: try: uid = item["jhd_userkey"] comepub = item.get("jhd_pb", ["#"]) firstLoginTime = item.get("firstLoginTime", "#") lastLoginTime = item.get("lastOpaTime", "#") data = dataDict[uid] data["firstLoginTime"] = firstLoginTime data["lastLoginTime"] = lastLoginTime data["comepub"] = comepub[0] if comepub else "#" locs = item.get("jhd_loc", None) if locs: data["locs"] = [] for item in locs: if isinstance(item, dict): prov, city = item.get("prov", "#"), item.get("city", "#") else: prov, city = item.split("_") if (prov, city) not in data["locs"]: data["locs"].append((prov, city)) except: import traceback print traceback.print_exc() for key in dataDict: data = dataDict[key] uid = key pushid = data["pushid"] plat = data["plat"] ua = data["ua"] net = "#".join(list(data["net"])) curpub = "#".join(list(data["curpubs"])) comepub = data["comepub"] firstLoginTime = data["firstLoginTime"] lastLoginTime = data["lastLoginTime"] ver = "#".join(list(data["vers"])) loc = "#".join(map(lambda item: "_".join(item), data["locs"])) in_num = data["in"][0] dur = "#".join(map(str, data["end"])) if data["end"] else "#" actions = {} [ actions.setdefault(key, data["action"][key][0]) for key in data["action"] ] actionDict = json.dumps(actions) pages = {} [ pages.setdefault(key, data["page"][key][0]) for key in data["page"] ] pageDict = json.dumps(pages) isactive = data["isactive"] line = [] line.append(uid) # 1 line.append(isactive) line.append(comepub) line.append(curpub) line.append(plat) # 5 line.append(ver) line.append(ua) line.append(net) line.append(firstLoginTime) line.append(lastLoginTime) # 10 line.append(loc) line.append(in_num) line.append(dur) line.append(actionDict) line.append(pageDict) # 15 line.append(pushid) # 16 JHWrite(uvfile_path, "\t".join(map(str, line))) JHWrite.finished(iszip=True)
def __init__(self): self.client = PyMongoClient() self.IPContainer = StackSet()
from apscheduler.triggers.cron import CronTrigger from apscheduler.events import ( EVENT_SCHEDULER_STARTED, EVENT_SCHEDULER_SHUTDOWN, EVENT_SCHEDULER_PAUSED, EVENT_SCHEDULER_RESUMED, EVENT_EXECUTOR_ADDED, EVENT_EXECUTOR_REMOVED, EVENT_JOBSTORE_ADDED, EVENT_JOBSTORE_REMOVED, EVENT_ALL_JOBS_REMOVED, EVENT_JOB_ADDED, EVENT_JOB_REMOVED, EVENT_JOB_MODIFIED, EVENT_JOB_EXECUTED, EVENT_JOB_ERROR, EVENT_JOB_MISSED, EVENT_JOB_SUBMITTED, EVENT_JOB_MAX_INSTANCES) from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor from JobHandler import JobHandler from DBClient.PyMongoClient import PyMongoClient global _mongoclient _mongoclient = PyMongoClient().getConn() class SchedulerManager(object): global _mongoclient def __init__(self): self._jobs = {} self._jobhandlers = {} # format, key: jobid, value: jobhandler self.create_scheduler() self.start() def create_scheduler(self): self.jobstores = { 'mongo':
class ModeWriteMongoUserIP(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.dbname = "jh" self.modename = "UserIP" def remove(self, *args, **kwargs): pass def write(self, *args, **kwargs): ''' :param args: 保留参数 :param kwargs: today = 当天日期(yyyy-mm-dd) :return: ''' today = kwargs["today"] cur = self.conn[self.dbname][self.modename].find( { "timestamp": { "$gte": time.time() - 100 }, "province": { "$exists": True }, "city": { "$exists": True }, "appkey": { "$exists": True } }, { "province": True, "city": True, "appkey": True }) update_appkey = {} for item in cur: ip = item["_id"] province = item["province"] city = item["city"] if not province: continue if not city: city = province appkey = item["appkey"] for a_appkey in appkey: update_appkey.setdefault(a_appkey, []).append( UpdateOne( OrderedDict([("tm", today), ("jhd_loc", { "$exista": False }), ("jhd_ip", ip)]), # 需要配合索引使用提高更新速度 { "$addToSet": { "jhd_loc": { "prov": province, "city": city } } })) for a_appkey in update_appkey: a = time.time() self.client.bulkWrite(a_appkey, "uvfile", update_appkey[a_appkey]) print(__name__, time.time() - a, a_appkey, len(update_appkey[a_appkey]))
class UserActiveWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.modename = "UserActive" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") # 格式化 tm = time.strftime( "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"partition_date": tm}) # def write(self, appkey, modename, curDay=time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)), *args, **kwargs): def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename curDay = kwargs["today"].replace( "-", "") if "today" in kwargs else time.strftime( "%Y%m%d", time.localtime(time.time() - 86400)) conn = self.client.getConn() userActiveCollection = conn[appkey][modename] docs = self.client.find(appkey, "UserProfile", {}) yesterday = getDay(curDay, "%Y%m%d", -1) op = [] a = time.time() for doc in docs: try: key = doc["_id"] activelife = doc.get("activelife", [0]) firstLoginTime = doc["firstLoginTime"][:8] login = getDayDelta(curDay, firstLoginTime) in activelife # 查找前一天的用户活跃记录,需要配合索引提升速度db.UserActive.ensureIndex({partition_date: -1, jh_uid: 1}) userActive = userActiveCollection.find_one({ "jh_uid": key, "partition_date": yesterday }) # 构造今天的用户记录 newUserActive = UserActiveBuilder() newUserActive.setJhdUid(key) newUserActive.setPartitionDate(curDay) if userActive is None: newUserActive.setActive([1] if login else [0]) else: userActive["active"].append(1 if login else 0) newUserActive.setActive(userActive["active"]) # 计算衡量指标 newUserActive.setFirstLoginTime(doc["firstLoginTime"]) newUserActive.setLastLoginTime(doc["lastLoginTime"]) op.append( ReplaceOne({ "jh_uid": key, "partition_date": curDay }, newUserActive.builder(), upsert=True)) except: import traceback print(traceback.print_exc(), doc) print("find cost time: %d" % int(time.time() - a)) # print("len(op): ", len(op), "yesterday: ", yesterday) try: if op: userActiveCollection.bulk_write(op) except: import traceback print(traceback.print_exc()) print( "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode" ) for op_item in op: try: userActiveCollection.bulk_write([op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserActiveWriter cost seconds %.10f" % ((time.time() - a), ))
def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn() self.modename = "UserProfile"
{ "_id": _id, "field_elems.%s.100" % (key, ): { "$exists": False } }, update_query, True)) except: import traceback print(traceback.print_exc(), op) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) # self.client.getConn()[appkey][modename].remove({"field_elems": {"$exists": True}}) finallyMask(appkey, modename, self.client) print("UserMapMetaWriter cost seconds %.10f" % ((time.time() - a), )) if __name__ == "__main__": client = PyMongoClient() uewriter = dropIndex("caiyu_ios_free", "UserEvent", client) # uewriter.remove("feeling", )