Пример #1
0
 def test_combine_nested_deep(self):
     np.random.seed(57454)
     list_of_nested = []
     for i in range(3):
         d = dict()
         d['a'] = i + 1
         d['b'] = [i + j for j in range(5)]
         d['c'] = {
             'c1': [j + 10 for j in range(5)],
             'c2': {
                 'c21': 10,
                 'c22': 20
             },
             'c3': 1
         }
         list_of_nested.append(d)
     list_of_nested_ = copy.deepcopy(list_of_nested)
     combine_fun = lambda x: np.sum(x)
     res = nest_utils.combine_nested(list_of_nested_, combine_fun)
     must = {
         'a': 6,
         'b': [3, 6, 9, 12, 15],
         'c': {
             'c1': [30, 33, 36, 39, 42],
             'c2': {
                 'c21': 30,
                 'c22': 60
             },
             'c3': 3
         }
     }
     self.assertDictEqual(res, must)
Пример #2
0
def combine_summary_from_devices(
        summary_devices: List[Dict[str, tf.Tensor]]) -> Dict[str, tf.Tensor]:
    """
    Combine (average or selects first element) the summary
    from multiple devices

    Parameters
    ----------
    summary_devices
        list of dicts with same structure from multiple devices

    Returns
    -------
    dict with same structure as first element in losses_devices with
    combination method depending on type of summary - for scalars and
    histograms values are averaged across devices and for other types
    the summary from first device is taken
    """
    if all(s is None for s in summary_devices):
        return {}
    if len(summary_devices) == 1:
        return _dict_identity(summary_devices[0])
    combine_method = {
        'scalar': lambda x: tf.reduce_mean(x, axis=0),
        'histogram': lambda x: tf.reduce_mean(x, axis=0),
        'image': lambda x: tf.identity(x[0]),
        'text': lambda x: tf.identity(x[0]),
        'audio': lambda x: tf.identity(x[0]),
        'default': lambda x: tf.identity(x[0])
    }
    combine_method_summary = defaultdict(lambda: lambda x: tf.identity(x[0]),
                                         combine_method)
    summary = nest_utils.combine_nested(summary_devices,
                                        combine_fun=combine_method_summary)
    return summary
Пример #3
0
def combine_predictions_from_devices(
        predictions_devices: List[Dict[str, tf.Tensor]],
        predictions_have_variable_shape: bool = False) -> Dict[str, tf.Tensor]:
    """
    Combines (concatenates) the predictions from multiple devices

    Parameters
    ----------
    predictions_devices
        list of dicts with same structure from multiple devices
    predictions_have_variable_shape
        if predictions from different devices may have different shapes; if so,
        it will use sparse operations to combine them

    Returns
    -------
    dict with same structure as first element in predictions_devices with
    concatenated over first dimension (batch dimension) values. If inputs
    have variable shape, then concatenation is done using
    :obj:`tf.sparse_concat` instead of :obj:`tf.concat`
    """
    if len(predictions_devices) == 1:
        return _dict_identity(predictions_devices[0])
    if predictions_have_variable_shape:
        combine_fun = lambda x: tf_ops.concat_padded(x, axis=0)
    else:
        combine_fun = lambda x: tf_ops.concat_or_stack(x, axis=0)
    with tf.variable_scope('combine_predictions'):
        predictions = nest_utils.combine_nested(predictions_devices,
                                                combine_fun=combine_fun)
    return predictions
Пример #4
0
    def test_combine_nested(self):
        np.random.seed(57454)
        list_of_nested = []
        for i in range(10):
            d = dict()
            d['a'] = np.random.randn(10, 5, 2)
            d['b'] = np.random.randn(100, 2)
            d['c'] = np.random.randn(10)
            d['e'] = np.random.randn(5, 6, 4)
            list_of_nested.append(d)

        combine_funs = [
            lambda x: np.concatenate(x, 0), lambda x: x[0],
            lambda x: np.mean(x, 0), {
                'a': lambda x: np.diff(x, 0),
                'b': lambda x: np.concatenate(np.argmax(x, 0), 0),
                'default': lambda x: x[0]
            }
        ]
        for combine_fun in combine_funs:
            list_of_nested_ = copy.deepcopy(list_of_nested)
            res = nest_utils.combine_nested(list_of_nested_, combine_fun)
            must = {}
            for k in list_of_nested_[0].keys():
                if isinstance(combine_fun, dict):
                    default = combine_fun['default']
                    combine_fun_ = combine_fun.get(k, default)
                else:
                    combine_fun_ = combine_fun
                must[k] = combine_fun_([l[k] for l in list_of_nested_])
            self.assertSetEqual(set(res.keys()),
                                set(list_of_nested_[0].keys()))
            self.assertAllClose(must, res)
Пример #5
0
    def test_kpi_call(self, is_last_iteration):
        temp_dir = self.get_temp_dir()
        os.mkdir(os.path.join(temp_dir, "save"))
        os.mkdir(os.path.join(temp_dir, "cache"))

        cacher1 = KPIMD5Cacher().build()
        kpi_plugin = DummyTpFpTnFnKPIPlugin(name="kpi_plugin1",
                                            inbound_nodes=["dataset"],
                                            cachers=[cacher1]).build()

        saver2 = KPIJsonSaver().build()
        cacher2 = KPIMD5Cacher().build()
        kpi_accumulator1 = DummyF1KPIAccumulator(
            name="kpi_acc1",
            inbound_nodes=["kpi_plugin1", "dataset"],
            cachers=[cacher2],
            savers=[saver2]).build()

        saver3 = KPIJsonSaver().build()
        cacher3 = KPIMD5Cacher().build()
        kpi_accumulator2 = _MeanKPIAccumulator(
            name="kpi_acc2",
            inbound_nodes=["kpi_acc1", "dataset"],
            incoming_keys_mapping={
                "dataset": {
                    "evaluate": "_"
                }
            },
            cachers=[cacher3],
            savers=[saver3]).build()

        kpi_evaluator = KPIEvaluator(
            plugins=kpi_plugin,
            accumulators=[kpi_accumulator1, kpi_accumulator2]).build()
        kpi_evaluator.save_target = os.path.join(temp_dir, "save")
        kpi_evaluator.cache_target = os.path.join(temp_dir, "cache")
        dataset_nucleotide = Nucleotide(name="dataset").build()
        dataset_nucleotide.generated_keys = [
            "labels", "predictions", "evaluate", "prefix"
        ]
        incoming_nucleotides = {'dataset': dataset_nucleotide}
        kpi_evaluator.build_dna(incoming_nucleotides)

        data_batch = nest_utils.combine_nested(self.data, np.array)
        kpi_evaluator.is_last_iteration = is_last_iteration
        _ = kpi_evaluator(dataset=data_batch)

        if is_last_iteration:
            last_kpi_must = {
                "kpi_acc1": self.kpis1_must[-1],
                "kpi_acc2": self.kpis2_must[-1]
            }
        else:
            last_kpi_must = {"kpi_acc1": self.kpis1_must[3]}
        self.assertAllClose(last_kpi_must, kpi_evaluator.last_kpi)
Пример #6
0
    def evaluate_on_batch(self,
                          method_to_evaluate: Callable,
                          sample_mask=None,
                          **inputs):
        """
        Call KPIEvaluator on the batch of data

        Parameters
        ----------
        method_to_evaluate
            method to call on the sample inputs
        sample_mask
            optional batch indicator which samples should be evaluated on
        inputs
            batch inputs to kpi evaluator

        Returns
        -------
        kpi
            calculated kpi
        """
        (batch_inputs_as_list, not_batch_inputs) = utils.split_batch_inputs(
            inputs, not_batch_keys=self.not_batch_keys)
        batch_size = len(batch_inputs_as_list)
        is_last_sample_batchwise = utils.get_is_last_sample_batchwise(
            batch_size, self.is_last_iteration, sample_mask)

        list_of_kpis = []
        for i_sample, each_sample_inputs in enumerate(batch_inputs_as_list):
            if sample_mask is None or sample_mask[i_sample]:
                self.is_last_sample = is_last_sample_batchwise[i_sample]
                kpi_sample = method_to_evaluate(**each_sample_inputs,
                                                **not_batch_inputs)
                if kpi_sample:
                    list_of_kpis.append(kpi_sample)

        if list_of_kpis:
            kpi = nest_utils.combine_nested(list_of_kpis, combine_fun=np.array)
        else:
            kpi = None
        return kpi
Пример #7
0
def combine_losses_from_devices(
        losses_devices: List[Dict[str, tf.Tensor]]) -> Dict[str, tf.Tensor]:
    """
    Combine (average) the losses from multiple devices

    Parameters
    ----------
    losses_devices
        list of dicts with same structure from multiple devices

    Returns
    -------
    dict with same structure as first element in losses_devices with
    average value of losses for that key
    """
    if len(losses_devices) == 1:
        return _dict_identity(losses_devices[0])
    with tf.variable_scope('combine_losses'):
        losses = nest_utils.combine_nested(
            losses_devices, combine_fun=lambda x: tf.reduce_mean(x, axis=0))
    return losses
Пример #8
0
    def test_parse_tfrecord_example(self, tf_decode_raw,
                                    tf_parse_single_example):
        def _get_tfrecords_features():
            return {
                "data1": "feature_1",
                "data2": "feature_data2",
                "data3": ["feature_data3_0", "feature_data3_1"],
                "data4": {
                    "sub1": "feature_data4_sub1",
                    "sub2": "feature_data4_sub2"
                }
            }

        def _get_tfrecords_output_types():
            return {"data1": "string_value", "data4/sub1": "float_value"}

        def _parse_single_example(example, features):
            example_flat = nest_utils.flatten_nested_struct(example, "/")
            result = {
                k: "-".join([str(example_flat[k]), features[k]])
                for k in example_flat
            }
            return result

        def _postprocess_tfrecords(**data):
            data_flat = nest_utils.flatten_nested_struct(data)
            return nest_utils.unflatten_dict_to_nested(
                {k: v + "_pp"
                 for k, v in data_flat.items()})

        tf_decode_raw.side_effect = lambda x, y: x + "_raw"
        tf_parse_single_example.side_effect = _parse_single_example

        mixin = tf_data_utils.TfRecordsMixin()
        mixin.get_tfrecords_features = MagicMock(wraps=_get_tfrecords_features)
        mixin.get_tfrecords_output_types = MagicMock(
            wraps=_get_tfrecords_output_types)
        mixin.postprocess_tfrecords = MagicMock(wraps=_postprocess_tfrecords)
        mixin.decode_field = MagicMock(wraps=mixin.decode_field)
        result = mixin.parse_tfrecord_example(self.data)

        features = _get_tfrecords_features()
        features_flat = nest_utils.flatten_nested_struct(features, "/")
        data_flat = nest_utils.flatten_nested_struct(self.data, "/")
        output_types_flat = nest_utils.flatten_nested_struct(
            _get_tfrecords_output_types(), "/")
        result_must = nest_utils.unflatten_dict_to_nested(
            {
                k: "-".join([str(data_flat[k]), features_flat[k]]) +
                ("_raw_pp" if k in output_types_flat else "_pp")
                for k in features_flat
            }, "/")
        self.assertAllEqual(result_must, result)

        mixin.get_tfrecords_features.assert_called_once_with()
        mixin.get_tfrecords_output_types.assert_called_once_with()

        combine_fn_before_decode = lambda x: "-".join([str(x[0]), x[1]])
        decode_values = nest_utils.flatten_nested_struct(
            nest_utils.combine_nested([self.data, features],
                                      combine_fun=combine_fn_before_decode),
            "/")
        decode_field_calls = [
            mock_call(each_key, decode_values[each_key],
                      output_types_flat.get(each_key))
            for each_key in decode_values
        ]
        mixin.decode_field.assert_has_calls(decode_field_calls, any_order=True)

        data_to_postprocess_must = nest_utils.unflatten_dict_to_nested(
            {
                k: "-".join([str(data_flat[k]), features_flat[k]]) +
                ("_raw" if k in output_types_flat else "")
                for k in features_flat
            }, "/")
        mixin.postprocess_tfrecords.assert_called_once_with(
            **data_to_postprocess_must)
Пример #9
0
    def test_call(self, plugin_is_last_sample, sample_mask, is_last_iteration,
                  is_last_sample_must):
        temp_dir = self.get_temp_dir()
        os.mkdir(os.path.join(temp_dir, "save"))
        os.mkdir(os.path.join(temp_dir, "cache"))

        plugin_is_last_sample.side_effect = lambda x: x
        saver = KPIJsonSaver().build()
        cacher = KPIMD5Cacher().build()
        kpi_plugin = DummyTpFpTnFnKPIPlugin(cachers=[cacher],
                                            savers=[saver]).build()

        kpi_plugin.save_target = os.path.join(temp_dir, "save")
        kpi_plugin.cache_target = os.path.join(temp_dir, "cache")
        kpi_plugin.evaluate_on_sample = MagicMock(
            wraps=kpi_plugin.evaluate_on_sample)

        data_batch = nest_utils.combine_nested(self.data, combine_fun=np.stack)
        kpi_plugin.is_last_iteration = is_last_iteration

        kpi_must_list = []
        for i_sample, each_kpi_must in enumerate(self.kpi_must):
            if sample_mask is None or sample_mask[i_sample]:
                kpi_must_list.append(each_kpi_must)

        if kpi_must_list:
            kpi_must = nest_utils.combine_nested(kpi_must_list,
                                                 combine_fun=np.array)
        else:
            kpi_must = None

        is_last_sample_calls_must = [mock_call(i) for i in is_last_sample_must]
        if sample_mask is None:
            evaluate_on_sample_args_must = [
                mock_call(**each_sample_data) for each_sample_data in self.data
            ]
        else:
            evaluate_on_sample_args_must = [
                mock_call(**each_sample_data)
                for i, each_sample_data in enumerate(self.data)
                if sample_mask[i]
            ]

        kpi = kpi_plugin(sample_mask=sample_mask, **data_batch)

        plugin_is_last_sample.assert_has_calls(is_last_sample_calls_must)
        kpi_plugin.evaluate_on_sample.assert_has_calls(
            evaluate_on_sample_args_must)

        if kpi_must is None:
            self.assertIsNone(kpi)
            return

        if sample_mask is None:
            self.assertAllClose(kpi_must, kpi)
        else:
            for i in range(sum(sample_mask)):
                sample_kpi_must = {k: v[i] for k, v in kpi_must.items()}
                sample_kpi = {k: v[i] for k, v in kpi.items()}
                if sample_mask[i]:
                    self.assertAllClose(sample_kpi_must, sample_kpi)
                else:
                    self.assertAllEqual(sample_kpi_must, sample_kpi)