def __init__(self, compression_pipeline):
     """Initialize."""
     self.compression_pipeline = compression_pipeline
     if self.compression_pipeline.is_lossy():
         self.lossless_pipeline = NoCompressionPipeline()
     else:
         self.lossless_pipeline = compression_pipeline
def test_decompress_no_metadata(tensor_key, named_tensor):
    """Test that decompress raises exception without metadata."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    metadata = []
    with pytest.raises(AssertionError):
        tensor_codec.decompress(
            tensor_key, named_tensor.data_bytes, metadata
        )
def test_find_dependencies_without_send_model_deltas(tensor_key):
    """Test that find_dependencies returns empty list when send_model_deltas = False."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, origin, 5, report, ('model',)
    )
    tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, False)

    assert len(tensor_key_dependencies) == 0
def test_find_dependencies_with_zero_round(tensor_key):
    """Test that find_dependencies returns empty list when round number is 0."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, origin, round_number, report, ('model',)
    )
    tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, True)

    assert len(tensor_key_dependencies) == 0
def test_decompress_no_tags(tensor_key, named_tensor):
    """Test that decompress raises exception without tags."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    with pytest.raises(AssertionError):
        tensor_codec.decompress(
            tensor_key, named_tensor.data_bytes, metadata
        )
def test_decompress_require_lossless_no_compressed_in_tags(tensor_key, named_tensor):
    """Test that decompress raises error when require_lossless is True and is no compressed tag."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, origin, round_number, report, ('lossy_compressed',)
    )
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    with pytest.raises(AssertionError):
        tensor_codec.decompress(
            tensor_key, named_tensor.data_bytes, metadata, require_lossless=True
        )
def test_find_dependencies(tensor_key):
    """Test that find_dependencies works correctly."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    round_number = 2
    tensor_key = TensorKey(
        tensor_name, origin, round_number, report, ('model',)
    )
    tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, True)

    assert len(tensor_key_dependencies) == 2
    tensor_key_dependency_0, tensor_key_dependency_1 = tensor_key_dependencies
    assert tensor_key_dependency_0.round_number == round_number - 1
    assert tensor_key_dependency_0.tags == tensor_key.tags
    assert tensor_key_dependency_1.tags == ('aggregated', 'delta', 'compressed')
def test_decompress_compressed_in_tags(tensor_key, named_tensor):
    """Test that decompress works correctly when there is compressed tag."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, origin, round_number, report, ('compressed',)
    )
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    decompressed_tensor_key, decompressed_nparray = tensor_codec.decompress(
        tensor_key, named_tensor.data_bytes, metadata
    )
    assert 'compressed' not in decompressed_tensor_key.tags
def test_generate(tensor_key, named_tensor):
    """Test that generate_delta works correctly."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    array_shape = tuple(metadata[0]['int_list'])
    flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32)

    nparray = np.reshape(flat_array, newshape=array_shape, order='C')

    delta_tensor_key, delta_nparray = tensor_codec.generate_delta(tensor_key, nparray, nparray)

    assert np.array_equal(delta_nparray, nparray - nparray)
    assert 'delta' in delta_tensor_key.tags
示例#10
0
    def __init__(self,
                 collaborator_name,
                 aggregator_uuid,
                 federation_uuid,
                 client,
                 task_runner,
                 tensor_pipe,
                 task_config,
                 opt_treatment=OptTreatment.RESET,
                 delta_updates=False,
                 db_store_rounds=1,
                 **kwargs):
        """Initialize."""
        self.single_col_cert_common_name = None

        if self.single_col_cert_common_name is None:
            self.single_col_cert_common_name = ''  # for protobuf compatibility
        # we would really want this as an object

        self.collaborator_name = collaborator_name
        self.aggregator_uuid = aggregator_uuid
        self.federation_uuid = federation_uuid

        self.tensor_pipe = tensor_pipe or NoCompressionPipeline()
        self.tensor_codec = TensorCodec(self.tensor_pipe)
        self.tensor_db = TensorDB()
        self.db_store_rounds = db_store_rounds

        self.task_runner = task_runner
        self.delta_updates = delta_updates

        self.client = client

        self.task_config = task_config

        self.logger = getLogger(__name__)

        # RESET/CONTINUE_LOCAL/CONTINUE_GLOBAL
        if hasattr(OptTreatment, opt_treatment):
            self.opt_treatment = OptTreatment[opt_treatment]
        else:
            self.logger.error("Unknown opt_treatment: %s." % opt_treatment)
            raise NotImplementedError(
                "Unknown opt_treatment: %s." % opt_treatment)

        self.task_runner.set_optimizer_treatment(self.opt_treatment.name)
def test_decompress_call_compression_pipeline(tensor_key, named_tensor):
    """Test that decompress calls compression pipeline when there is no compressed tag."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, origin, round_number, report, ('lossy_compressed',)
    )
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    tensor_codec.compression_pipeline = mock.Mock()
    tensor_codec.decompress(
        tensor_key, named_tensor.data_bytes, metadata
    )
    tensor_codec.compression_pipeline.backward.assert_called_with(
        named_tensor.data_bytes, metadata)
def test_decompress_call_lossless_pipeline_with_require_lossless(tensor_key, named_tensor):
    """Test that decompress calls lossless pipeline when require_lossless is True."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, origin, round_number, report, ('compressed',)
    )
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    tensor_codec.lossless_pipeline = mock.Mock()
    tensor_codec.decompress(
        tensor_key, named_tensor.data_bytes, metadata, require_lossless=True
    )
    tensor_codec.lossless_pipeline.backward.assert_called_with(
        named_tensor.data_bytes, metadata)
def test_compress_lossless(tensor_key, named_tensor):
    """Test that compress works correctly with require_lossless flag."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    array_shape = tuple(metadata[0]['int_list'])
    flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32)

    nparray = np.reshape(flat_array, newshape=array_shape, order='C')
    compressed_tensor_key, compressed_nparray, metadata = tensor_codec.compress(
        tensor_key, nparray, require_lossless=True)

    assert 'compressed' in compressed_tensor_key.tags
    assert compressed_tensor_key.tensor_name == tensor_key.tensor_name
    assert compressed_tensor_key.origin == tensor_key.origin
    assert compressed_tensor_key.round_number == tensor_key.round_number
def test_generate_delta_assert_model_in_tags(tensor_key, named_tensor):
    """Test that generate_delta raises exception when there is model tag."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, origin, round_number, report, ('model',)
    )
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    array_shape = tuple(metadata[0]['int_list'])
    flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32)

    nparray = np.reshape(flat_array, newshape=array_shape, order='C')

    with pytest.raises(AssertionError):
        tensor_codec.generate_delta(tensor_key, nparray, nparray)
def test_apply_delta_agg(tensor_key, named_tensor):
    """Test that apply_delta works for aggregator tensor_key."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_name, origin, round_number, report, tags = tensor_key
    tensor_key = TensorKey(
        tensor_name, 'aggregator_1', round_number, report, ('delta',)
    )
    metadata = [{'int_to_float': proto.int_to_float,
                 'int_list': proto.int_list,
                 'bool_list': proto.bool_list
                 } for proto in named_tensor.transformer_metadata]
    array_shape = tuple(metadata[0]['int_list'])
    flat_array = np.frombuffer(named_tensor.data_bytes, dtype=np.float32)

    nparray = np.reshape(flat_array, newshape=array_shape, order='C')

    new_model_tensor_key, nparray_with_delta = tensor_codec.apply_delta(
        tensor_key, nparray, nparray)

    assert 'delta' not in new_model_tensor_key.tags
    assert np.array_equal(nparray_with_delta, nparray + nparray)
    def __init__(self,
                 aggregator_uuid,
                 federation_uuid,
                 authorized_cols,
                 init_state_path,
                 best_state_path,
                 last_state_path,
                 assigner,
                 rounds_to_train=256,
                 single_col_cert_common_name=None,
                 compression_pipeline=None,
                 db_store_rounds=1,
                 **kwargs):
        """Initialize."""
        self.round_number = 0
        self.single_col_cert_common_name = single_col_cert_common_name

        if self.single_col_cert_common_name is not None:
            self._log_big_warning()
        else:
            # FIXME: '' instead of None is just for protobuf compatibility.
            # Cleaner solution?
            self.single_col_cert_common_name = ''

        self.rounds_to_train = rounds_to_train

        # if the collaborator requests a delta, this value is set to true
        self.authorized_cols = authorized_cols
        self.uuid = aggregator_uuid
        self.federation_uuid = federation_uuid
        self.assigner = assigner
        self.quit_job_sent_to = []

        self.tensor_db = TensorDB()
        self.db_store_rounds = db_store_rounds
        self.compression_pipeline = compression_pipeline \
            or NoCompressionPipeline()
        self.tensor_codec = TensorCodec(self.compression_pipeline)
        self.logger = getLogger(__name__)

        self.init_state_path = init_state_path
        self.best_state_path = best_state_path
        self.last_state_path = last_state_path

        self.best_tensor_dict: dict = {}
        self.last_tensor_dict: dict = {}

        self.best_model_score = None
        self.model: ModelProto = utils.load_proto(self.init_state_path)

        self._load_initial_tensors()  # keys are TensorKeys

        self.log_dir = f'logs/{self.uuid}_{self.federation_uuid}'
        # TODO use native tensorboard
        # self.tb_writer = tb.SummaryWriter(self.log_dir, flush_secs = 10)

        self.collaborator_tensor_results = {}  # {TensorKey: nparray}}

        # these enable getting all tensors for a task
        # {TaskResultKey: list of TensorKeys}
        self.collaborator_tasks_results = {}
        # {TaskResultKey: data_size}
        self.collaborator_task_weight = {}
class TensorCodec:
    """TensorCodec is responsible for the following.

    1. Tracking the compression/decompression related dependencies of a given tensor
    2. Acting as a TensorKey aware wrapper for the compression_pipeline functionality
    """
    def __init__(self, compression_pipeline):
        """Initialize."""
        self.compression_pipeline = compression_pipeline
        if self.compression_pipeline.is_lossy():
            self.lossless_pipeline = NoCompressionPipeline()
        else:
            self.lossless_pipeline = compression_pipeline

    def set_lossless_pipeline(self, lossless_pipeline):
        """Set lossless pipeline."""
        assert lossless_pipeline.is_lossy() is False, (
            "The provided pipeline is not lossless")
        self.lossless_pipeline = lossless_pipeline

    def compress(self, tensor_key, data, require_lossless=False, **kwargs):
        """
        Function-wrapper around the tensor_pipeline.forward function.

        It also keeps track of the tensorkeys associated with the compressed nparray

        Args:
            tensor_key:             TensorKey is provided to verify it should
                                    be compressed, and new TensorKeys returned
                                    will be derivatives of the existing
                                    tensor_name

            data:                   (uncompressed) numpy array associated with
                                    the tensor_key

            require_lossless:       boolean. Does tensor require
                                    compression

        Returns:
            compressed_tensor_key:  Tensorkey corresponding to the decompressed
                                    tensor

            compressed_nparray:     The compressed tensor

            metadata:               metadata associated with compressed tensor

        """
        if require_lossless:
            compressed_nparray, metadata = self.lossless_pipeline.forward(
                data, **kwargs)
        else:
            compressed_nparray, metadata = self.compression_pipeline.forward(
                data, **kwargs)
        # Define the compressed tensorkey that should be
        # returned ('trained.delta'->'trained.delta.lossy_compressed')
        tensor_name, origin, round_number, report, tags = tensor_key
        if not self.compression_pipeline.is_lossy() or require_lossless:
            new_tags = tuple(list(tags) + ['compressed'])
        else:
            new_tags = tuple(list(tags) + ['lossy_compressed'])
        compressed_tensor_key = TensorKey(tensor_name, origin, round_number,
                                          report, new_tags)
        return compressed_tensor_key, compressed_nparray, metadata

    def decompress(self,
                   tensor_key,
                   data,
                   transformer_metadata,
                   require_lossless=False,
                   **kwargs):
        """
        Function-wrapper around the tensor_pipeline.backward function.

        It also keeps track of the tensorkeys associated with the decompressed nparray

        Args:
            tensor_key:             TensorKey is provided to verify it should
                                    be decompressed, and new TensorKeys
                                    returned will be derivatives of the
                                    existing tensor_name

            data:                   (compressed) numpy array associated with
                                    the tensor_key

            transformer_metadata:   metadata associated with the compressed
                                    tensor

            require_lossless:       boolean, does data require lossless
                                    decompression

        Returns:
            decompressed_tensor_key:    Tensorkey corresponding to the
                                        decompressed tensor

            decompressed_nparray:       The decompressed tensor

        """
        tensor_name, origin, round_number, report, tags = tensor_key

        assert (len(transformer_metadata) >
                0), ('metadata must be included for decompression')
        assert (('compressed' in tags)
                or ('lossy_compressed'
                    in tags)), ("Cannot decompress an uncompressed tensor")
        if require_lossless:
            assert ('compressed'
                    in tags), ("Cannot losslessly decompress lossy tensor")

        if require_lossless or 'compressed' in tags:
            decompressed_nparray = self.lossless_pipeline.backward(
                data, transformer_metadata, **kwargs)
        else:
            decompressed_nparray = self.compression_pipeline.backward(
                data, transformer_metadata, **kwargs)
        # Define the decompressed tensorkey that should be returned
        if 'lossy_compressed' in tags:
            lc_idx = tags.index('lossy_compressed')
            new_tags = list(tags)
            new_tags[lc_idx] = 'lossy_decompressed'
            decompressed_tensor_key = TensorKey(tensor_name, origin,
                                                round_number, report,
                                                tuple(new_tags))
        elif 'compressed' in tags:
            # 'compressed' == lossless compression; no need for
            # compression related tag after decompression
            new_tags = list(tags)
            new_tags.remove('compressed')
            decompressed_tensor_key = TensorKey(tensor_name, origin,
                                                round_number, report,
                                                tuple(new_tags))
        else:
            raise NotImplementedError(
                "Decompression is only supported on compressed data")

        return decompressed_tensor_key, decompressed_nparray

    @staticmethod
    def generate_delta(tensor_key, nparray, base_model_nparray):
        """
        Create delta from the updated layer and base layer.

        Args:
            tensor_key:         This is the tensor_key associated with the
                                nparray.
                                Should have a tag of 'trained' or 'aggregated'

            nparray:            The nparray that corresponds to the tensorkey

            base_model_nparray: The base model tensor that will be subtracted
                                from the new weights

        Returns:
            delta_tensor_key:   Tensorkey that corresponds to the delta weight
                                array

            delta:              Difference between the provided tensors

        """
        tensor_name, origin, round_number, report, tags = tensor_key
        if not np.isscalar(nparray):
            assert nparray.shape == base_model_nparray.shape, (
                'Shape of updated layer ({}) is not equal to base '
                'layer shape of ({})'.format(nparray.shape,
                                             base_model_nparray.shape))
        assert 'model' not in tags, (
            'The tensorkey should be provided '
            'from the layer with new weights, not the base model')
        if type(tags) == str:
            new_tags = tuple([tensor_key[3]] + ['delta'])
        else:
            new_tags = tuple(list(tags) + ['delta'])
        delta_tensor_key = TensorKey(tensor_name, origin, round_number, report,
                                     new_tags)
        return delta_tensor_key, nparray - base_model_nparray

    @staticmethod
    def apply_delta(tensor_key, delta, base_model_nparray):
        """
        Add delta to the nparray.

        Args:
            tensor_key:             This is the tensor_key associated with the
                                    delta. Should have a tag of 'trained' or
                                    'aggregated'
            delta:                  Weight delta between the new model and
                                    old model
            base_model_nparray:     The nparray that corresponds to the prior
                                    weights

        Returns:
            new_model_tensor_key:   Latest model layer tensorkey
            new_model_nparray:      Latest layer weights

        """
        tensor_name, origin, round_number, report, tags = tensor_key
        if not np.isscalar(base_model_nparray):
            assert (delta.shape == base_model_nparray.shape), (
                'Shape of delta ({}) is not equal to shape of model'
                ' layer ({})'.format(delta.shape, base_model_nparray.shape))
        # assert('model' in tensor_key[3]), 'The tensorkey should be provided
        # from the base model'
        # Aggregator UUID has the prefix 'aggregator'
        if 'aggregator' in origin:
            tags = list(tags)
            tags.remove('delta')
            new_tags = tuple(tags)
            new_model_tensor_key = TensorKey(tensor_name, origin, round_number,
                                             report, new_tags)
        else:
            new_model_tensor_key = TensorKey(tensor_name, origin, round_number,
                                             report, ('model', ))

        return new_model_tensor_key, base_model_nparray + delta

    def find_dependencies(self, tensor_key, send_model_deltas):
        """Resolve the tensors required to do the specified operation."""
        tensor_key_dependencies = []

        tensor_name, origin, round_number, report, tags = tensor_key

        if 'model' in tags and send_model_deltas:
            if round_number >= 1:
                # The new model can be generated by previous model + delta
                tensor_key_dependencies.append(
                    TensorKey(tensor_name, origin, round_number - 1, report,
                              tags))
                if self.compression_pipeline.is_lossy():
                    new_tags = ('aggregated', 'delta', 'lossy_compressed')
                else:
                    new_tags = ('aggregated', 'delta', 'compressed')
                tensor_key_dependencies.append(
                    TensorKey(tensor_name, origin, round_number, report,
                              new_tags))

        return tensor_key_dependencies
def test_find_dependencies_without_model_in_tags(tensor_key):
    """Test that find_dependencies returns empty list when there is no model tag."""
    tensor_codec = TensorCodec(NoCompressionPipeline())
    tensor_key_dependencies = tensor_codec.find_dependencies(tensor_key, True)

    assert len(tensor_key_dependencies) == 0