Пример #1
0
class Collection(object):
    def __init__(self, collection, queue=None, **kwargs):
        self.collection = collection
        self.runable = False
        self._queue_set = queue
        self.kwargs = kwargs

    def initialize(self, queue, lsize=50, timeout=60, **kwargs):
        if not queue:
            self.QM = QueueManager("0.0.0.0", port=9998)
            self.queue = self.QM.Queue(queue_type="python_queue", **kwargs)
        else:
            self.QM = QueueManager("0.0.0.0", port=9998)
            try:
                self.queue = self.QM.Queue(queue_type=queue, **kwargs)
            except Exception as e:
                print "queue type: `python_queue`, `redis_queue`"
                import traceback
                traceback.print_exc()
        self.asyn_collection = None
        self.lsize = lsize
        self.timeout = timeout
        self.l_list = []  #插入任务
        self.u_list = []  #更新任务

        #clear all old queue data
        while self.queue.qsize():
            _ = self.queue.get()

        #set status running
        self.runable = True
        self.t = threading.Thread(target=self._run_single)
        self.t.start()

    @staticmethod
    def _get_dict(ob):
        if not hasattr(ob, "_origin"):
            return ob.__dict__
        else:
            dict1 = dict(ob.__dict__)
            dict1.pop("_origin")
            return dict1

    @staticmethod
    def _get_update_and_remove_dict(ob):
        origin_hash = ob._origin
        delattr(ob, "_origin")
        now_hash = hash_object(ob)
        set_origin_hash_key = set(origin_hash.keys())
        set_now_hash_key = set(now_hash.keys())

        #get remove dict
        remove_key = set_origin_hash_key - set_now_hash_key
        remove_dict = dict()
        for i in remove_key:
            remove_dict[i] = 1

        #get update dict
        eq_key = set_origin_hash_key & set_now_hash_key
        update_dict = dict()
        for i in eq_key:
            if origin_hash[i] != now_hash[i]:
                update_dict[i] = getattr(ob, i)

        add_key = set_now_hash_key - set_origin_hash_key
        for i in add_key:
            update_dict[i] = getattr(ob, i)

        return [update_dict, remove_dict]

    def set_collection(self, client, db, collection):
        self.collection = client.get_database(db).get_collection(collection)

    def qsize(self):
        if hasattr(self, "queue"):
            return self.queue.qsize()
        else:
            raise Exception("not use asyn feature, have no queue")

    def insert(self, ob):  # 同步插入
        self.collection.insert_one(self._get_dict(ob))

    def insert_asyn(self, ob, lsize=50, timeout=60):  # 异步插入
        #lazy initialize asyn
        if not self.runable:
            self.initialize(queue=self._queue_set,
                            lsize=lsize,
                            timeout=timeout,
                            **self.kwargs)

        self.queue.put([self.collection, "insert", ob])

    def update(self, ob):  # 更新
        if not hasattr(ob, "_id"):
            raise Exception("not a normal mongo item")
        self._real_update(ob)

    def update_asyn(self, ob, lsize=50, timeout=60):  # 异步更新
        #lazy initialize asyn
        if not self.runable:
            self.initialize(queue=self._queue_set,
                            lsize=lsize,
                            timeout=timeout,
                            **self.kwargs)

        self.queue.put([self.collection, "update", ob])

    def find(self,
             json=dict(),
             item=dict(),
             limit=0,
             skip=0):  # 查询,返回对象generator
        if not limit:
            if not item:
                result = self.collection.find(json).skip(skip)
            else:
                result = self.collection.find(json, item).skip(skip)
        else:
            if not item:
                result = self.collection.find(json).skip(skip).limit(limit)
            else:
                result = self.collection.find(json,
                                              item).skip(skip).limit(limit)
        if not result:
            yield None
        else:
            for item in result:
                yield obj(**item)

    def find_one(self, json=dict(),
                 item=dict()):  # find a item, return a object
        if not item:
            result = self.collection.find_one(json)
        else:
            result = self.collection.find_one(json, item)
        if not result:
            return None
        else:
            return obj(**result)

    def close(self):
        # not use asyn
        if not self.runable:
            pass
        #use asyn
        else:
            while self.queue.qsize():  # 等待未完成任务
                time.sleep(0.2)
            if self.runable:
                self.queue.put("X")
                if self.t:
                    self.t.join()
                    if hasattr(self, 'QM'):
                        self.QM.shutdown()
                    return
                else:
                    return
            else:
                return

    def _run_last(self):  # 执行上个循环任务
        if self.l_list:  # 插入
            self._real_insert_asyn(self.l_list)
            self.l_list = []
        else:
            pass

        if self.u_list:  # 更新
            for ob in self.u_list:
                self._real_update(ob)
            self.u_list = []
        else:
            pass

    def _get_size(self):
        if self.queue.qsize() > self.lsize:  # 获取这个队列中的大小
            size = self.lsize
        else:
            if self.queue.qsize():
                size = self.queue.qsize()
            else:
                size = 1  # 如果队列为空,只要下次有数据插入(队列大于一),就会被捕获,激活线程
        return size

    def _run_single(self):
        while self.runable:
            self._run_last()
            size = self._get_size()

            for i in xrange(size):
                try:
                    item = self.queue.get(timeout=self.timeout)

                    if isinstance(item, str):
                        if item == "X":  # "X"为停止信号
                            self.runable = False
                            break

                    elif isinstance(item, list):
                        collection, mark, ob = item  #第一位为集合,第二位为标志位,第三位为对象

                        if i == 0:  #第一次运行循环,设置异步collection
                            self.asyn_collection = collection

                        if self.asyn_collection == collection:  #保证一个循环的collection是相同的
                            if mark == "insert":
                                self.l_list.append(self._get_dict(ob))
                            elif mark == "update":
                                self.u_list.append(ob)
                        else:  #如果不同,把元素放回去
                            self.queue.put_left(item)
                            break
                    else:
                        raise Exception("Error Queue message:\t" + item)
                except Empty:
                    self.runable = False
                    break

    def _real_insert_asyn(self, l_list):
        self.collection.insert_many(l_list)

    def _real_update(self, ob):
        if not hasattr(ob, "_id"):
            raise Exception("not a normal mongo item")
        elif not hasattr(ob, "_origin"):
            raise Exception(
                "have no _origin data, can't use update_asyn callable")

        update_dict, remove_dict = self._get_update_and_remove_dict(ob)

        if update_dict and remove_dict:
            self.collection.update_one({"_id": ob._id}, {
                "$set": update_dict,
                "$unset": remove_dict
            })
        elif update_dict:
            self.collection.update_one({"_id": ob._id}, {"$set": update_dict})
        elif remove_dict:
            self.collection.update_one({"_id": ob._id},
                                       {"$unset": remove_dict})