示例#1
0
 def resize_to_new_table(self, new_size_per_shard, new_dir_name):
     self.increment_rpc_count_by(n=2)
     assert not FileUtil.does_dir_exist(dir_name=new_dir_name) or FileUtil.is_dir_empty(dir_name=new_dir_name)
     new_sptable_storage = ShardedProtoTableStorage(size_per_shard=new_size_per_shard)
     new_sptable_storage.initialize_from_dir(dir_name=new_dir_name)
     for shard in range(self.get_num_shards()):
         related_proto_file = self._shard_to_file(shard=shard)
         proto_table = ProtoTableStorage()
         proto_table.initialize_from_file(file_name=related_proto_file)
         new_sptable_storage.write(data=proto_table.read_all())
         self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
     return new_sptable_storage
示例#2
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True
        assert isinstance(data, dict)

        exising_shard_to_data_map = defaultdict(dict)
        new_data = {}
        for key, val in data.items():
            if key in self._index_map.index_map:
                exising_shard_to_data_map[self._index_map.index_map[key]][key] = val
            else:
                new_data[key] = val

        try:
            for shard, existing_data in exising_shard_to_data_map.items():
                related_proto_file = self._shard_to_file(shard=shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=related_proto_file)
                proto_table.write(data=existing_data, params=params)
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
            if new_data:
                all_new_keys = list(new_data.keys())
                latest_shard = self.get_latest_shard()
                latest_proto_file = self._shard_to_file(shard=latest_shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=latest_proto_file)
                latest_proto_table_size = proto_table.get_num_entries()
                proto_table.write(
                    data={key: new_data[key] for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]},
                    params=params
                )
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]:
                    self._index_map.index_map[key] = latest_shard

                if len(all_new_keys) > self._size_per_shard - latest_proto_table_size:
                    remaining_new_keys = all_new_keys[self._size_per_shard - latest_proto_table_size:]
                    start_index = 0
                    while start_index < len(remaining_new_keys):
                        data_for_new_shard = remaining_new_keys[start_index:start_index + self._size_per_shard]
                        latest_shard += 1
                        self._index_map.cur_shard += 1
                        proto_file = self._shard_to_file(shard=latest_shard)
                        self._logger.info("Write to new file with name [" + proto_file + '] and shard [' +
                                          str(latest_shard) + '].')
                        proto_table = ProtoTableStorage()
                        proto_table.initialize_from_file(file_name=proto_file)
                        proto_table.write(
                            data={key: new_data[key] for key in data_for_new_shard},
                            params=params
                        )
                        self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                        for key in data_for_new_shard:
                            self._index_map.index_map[key] = latest_shard

                        start_index += self._size_per_shard
                self._logger.info("Writing the index map to [" + self._index_map_file + '].')
                self.increment_rpc_count_by(n=1)
                FileUtil.write_proto_to_file(
                    proto=self._index_map,
                    file_name=self._index_map_file
                )

        except Exception as err:
            self._SYS_LOGGER.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')