示例#1
0
 def add_key(self, key, key_infos):
     '''
     Dumps key to the bucket file.
     '''
     log.debug("Append key to bucket: group_id: {0}, bucket: {1}".format(self.group_id, self.bucket_file.name))
     key_data = (key, key_infos)
     dump_key_data(key_data, self.bucket_file)
示例#2
0
 def move_to_rest_bucket(self, key, key_infos):
     '''
     Dumps key to 'rest_keys' bucket.
     '''
     log.info("Moving key to rest keys bucket: {0}".format(key))
     key_data = (key, key_infos)
     dump_key_data(key_data, self.ctx.rest_file)
示例#3
0
 def move_to_rest_bucket(self, key, key_infos):
     '''
     Dumps key to 'rest_keys' bucket.
     '''
     log.info("Moving key to rest keys bucket: {0}".format(key))
     key_data = (key, key_infos)
     dump_key_data(key_data, self.ctx.rest_file)
示例#4
0
 def add_key(self, key, key_infos):
     '''
     Dumps key to the bucket file.
     '''
     log.debug("Append key to bucket: group_id: {0}, bucket: {1}".format(self.group_id, self.bucket_file.name))
     key_data = (key, key_infos)
     dump_key_data(key_data, self.bucket_file)
示例#5
0
    def sort_by_physical_order(self):
        chunk_files = []
        try:
            log.info("Sort bucket: phase 1: sort chunks: group_id: {0}, bucket: {1}"
                     .format(self.group_id, self.bucket_file.name))
            max_chunks_size = 500 * 1024 * 1024  # 500 Mb

            chunk = []
            chunk_size = 0

            self.bucket_file.seek(0)
            for key_data in load_key_data_from_file(self.bucket_file):
                chunk_size += sys.getsizeof(key_data) + sys.getsizeof(key_data[0]) + sys.getsizeof(key_data[1]) + \
                              sum(sys.getsizeof(info) for info in key_data[1])
                chunk.append((self._get_physical_position(key_data), key_data))
                if chunk_size > max_chunks_size:
                    chunk_size = 0
                    self._sort_chunk(chunk, chunk_files)
            if chunk:
                self._sort_chunk(chunk, chunk_files)

            log.info("Sort bucket: phase 2: merge chunks: group_id: {0}, bucket: {1}"
                     .format(self.group_id, self.bucket_file.name))

            class MergeDataWrapper(object):
                def __init__(self, gen, outer):
                    self.gen = gen
                    self.outer = outer
                    self.next()

                def __cmp__(self, other):
                    return cmp(self.physical_position, other.physical_position)

                def next(self):
                    try:
                        self.key_data = self.gen.next()
                        self.physical_position = self.outer._get_physical_position(self.key_data)
                    except StopIteration:
                        return False
                    return True

            heap = []
            for chunk in chunk_files:
                chunk.seek(0)
                heapq.heappush(heap, MergeDataWrapper(load_key_data_from_file(chunk), self))

            self.bucket_file.seek(0)
            self.bucket_file.truncate()
            while len(heap):
                wrapper = heapq.heappop(heap)
                dump_key_data(wrapper.key_data, self.bucket_file)
                if wrapper.next():
                    heapq.heappush(heap, wrapper)
        finally:
            for f in chunk_files:
                os.unlink(f.name)
示例#6
0
    def sort_by_physical_order(self):
        chunk_files = []
        try:
            log.info("Sort bucket: phase 1: sort chunks: group_id: {0}, bucket: {1}"
                     .format(self.group_id, self.bucket_file.name))
            max_chunks_size = 500 * 1024 * 1024 # 500 Mb

            chunk = []
            chunk_size = 0

            self.bucket_file.seek(0)
            for key_data in load_key_data_from_file(self.bucket_file):
                chunk_size += sys.getsizeof(key_data) + sys.getsizeof(key_data[0]) + sys.getsizeof(key_data[1]) + \
                              sum(sys.getsizeof(info) for info in key_data[1])
                chunk.append((self._get_physical_position(key_data), key_data))
                if chunk_size > max_chunks_size:
                    chunk_size = 0
                    self._sort_chunk(chunk, chunk_files)
            if chunk:
                self._sort_chunk(chunk, chunk_files)

            log.info("Sort bucket: phase 2: merge chunks: group_id: {0}, bucket: {1}"
                     .format(self.group_id, self.bucket_file.name))
            class MergeDataWrapper(object):
                def __init__(self, gen, outer):
                    self.gen = gen
                    self.outer = outer
                    self.next()

                def __cmp__(self, other):
                    return cmp(self.physical_position, other.physical_position)

                def next(self):
                    try:
                        self.key_data = self.gen.next()
                        self.physical_position = self.outer._get_physical_position(self.key_data)
                    except StopIteration:
                        return False
                    return True

            heap = []
            for chunk in chunk_files:
                chunk.seek(0)
                heapq.heappush(heap, MergeDataWrapper(load_key_data_from_file(chunk), self))

            self.bucket_file.seek(0)
            self.bucket_file.truncate()
            while len(heap):
                wrapper = heapq.heappop(heap)
                dump_key_data(wrapper.key_data, self.bucket_file)
                if wrapper.next():
                    heapq.heappush(heap, wrapper)
        finally:
            for f in chunk_files:
                os.unlink(f.name)
示例#7
0
    def _sort_chunk(self, chunk, chunk_files):
        filename = "{}.{}".format(self.bucket_file.name, len(chunk_files))
        chunk_file = open(filename, 'wb+')
        chunk_files.append(chunk_file)

        chunk.sort(key=lambda k: k[0])

        for _, key_data in chunk:
            dump_key_data(key_data, chunk_file)

        del chunk[:]
示例#8
0
    def _sort_chunk(self, chunk, chunk_files):
        filename = "{}.{}".format(self.bucket_file.name, len(chunk_files))
        chunk_file = open(filename, 'wb+')
        chunk_files.append(chunk_file)

        chunk.sort(key=lambda k: k[0])

        for _, key_data in chunk:
            dump_key_data(key_data, chunk_file)

        del chunk[:]