def test_add_to_empty_file(self):
        report_accessor = _ReportAccessor(self.get_temp_dir())
        materialized_reports = [
            subnetwork.MaterializedReport(
                iteration_number=0,
                name="foo",
                hparams={
                    "p1": 1,
                    "p2": "hoo",
                    "p3": True,
                },
                attributes={
                    "a1": 1,
                    "a2": "aoo",
                    "a3": True,
                },
                metrics={
                    "m1": 1,
                    "m2": "moo",
                    "m3": True,
                },
                included_in_final_ensemble=True,
            ),
        ]

        report_accessor.write_iteration_report(
            iteration_number=0,
            materialized_reports=materialized_reports,
        )
        actual_iteration_reports = list(
            report_accessor.read_iteration_reports())

        self.assertEqual(1, len(actual_iteration_reports))
        self.assertEqual(materialized_reports, actual_iteration_reports[0])
示例#2
0
  def test_write_iteration_report_encoding(self):
    """Tests GitHub issue #4."""

    report_accessor = _ReportAccessor(self.get_temp_dir())
    bytes_value = b"\n\x83\x01\n;adanet/iteration_2/ensemble_2_layer_dnn/"
    materialized_reports = [
        subnetwork.MaterializedReport(
            iteration_number=0,
            name="foo",
            hparams={
                "p2": bytes_value,
            },
            attributes={
                "a2": bytes_value,
            },
            metrics={
                "m2": bytes_value,
            },
            included_in_final_ensemble=True,
        ),
    ]

    report_accessor.write_iteration_report(
        iteration_number=0,
        materialized_reports=materialized_reports,
    )
    actual_iteration_reports = list(report_accessor.read_iteration_reports())
    self.assertEqual(1, len(actual_iteration_reports))
示例#3
0
def _iteration_report_pb_to_subnetwork_reports(iteration_report_pb):
    """Converts IterationReport proto to a `MaterializedReport` list."""
    def _proto_map_to_dict(proto, field_name):
        """Converts map field of a proto to a dict.

    Args:
      proto: the proto to read from.
      field_name: name of the map field in the proto.

    Returns:
      Dict with the keys and values in proto.field_name.

    Raises:
      ValueError: if proto.field_name has a value that's not an int_value,
        float_value, bool_value, bytes_value, or string_value.
    """

        dictionary = {}
        proto_field = getattr(proto, field_name)
        for key in proto_field:
            if proto_field[key].HasField("int_value"):
                value = proto_field[key].int_value
            elif proto_field[key].HasField("float_value"):
                value = proto_field[key].float_value
            elif proto_field[key].HasField("bytes_value"):
                value = proto_field[key].bytes_value
            elif proto_field[key].HasField("string_value"):
                value = proto_field[key].string_value
            elif proto_field[key].HasField("bool_value"):
                value = proto_field[key].bool_value
            else:
                raise ValueError(
                    "{} map in subnetwork_report_pb has invalid field. "
                    "key: {} value: {} type: {}".format(
                        field_name, key, proto_field[key],
                        type(proto_field[key])))
            dictionary[key] = value

        return dictionary

    return [
        subnetwork.MaterializedReport(
            iteration_number=iteration_report_pb.iteration_number,
            name=subnetwork_report_pb.name,
            hparams=_proto_map_to_dict(proto=subnetwork_report_pb,
                                       field_name="hparams"),
            attributes=_proto_map_to_dict(proto=subnetwork_report_pb,
                                          field_name="attributes"),
            metrics=_proto_map_to_dict(proto=subnetwork_report_pb,
                                       field_name="metrics"),
            included_in_final_ensemble=(
                subnetwork_report_pb.included_in_final_ensemble))
        for subnetwork_report_pb in iteration_report_pb.subnetwork_reports
    ]
示例#4
0
 def test_value_error(self, hparams=None, attributes=None, metrics=None):
     if hparams is None:
         hparams = {}
     if attributes is None:
         attributes = {}
     if metrics is None:
         metrics = {}
     report_accessor = _ReportAccessor(self.get_temp_dir())
     materialized_reports = [
         subnetwork.MaterializedReport(
             iteration_number=0,
             name="foo",
             hparams=hparams,
             attributes=attributes,
             metrics=metrics,
             included_in_final_ensemble=True,
         ),
     ]
     with self.assertRaises(ValueError):
         report_accessor.write_iteration_report(
             iteration_number=0,
             materialized_reports=materialized_reports,
         )
示例#5
0
    def materialize_subnetwork_reports(self, sess, iteration_number,
                                       subnetwork_reports,
                                       included_subnetwork_names):
        """Materializes the Tensor objects in subnetwork_reports using sess.

    This converts the Tensors in subnetwork_reports to ndarrays, logs the
    progress, converts the ndarrays to python primitives, then packages them
    into `adanet.subnetwork.MaterializedReports`.

    Args:
      sess: `Session` instance with most recent variable values loaded.
      iteration_number: Integer iteration number.
      subnetwork_reports: Dict mapping string names to `subnetwork.Report`
        objects to be materialized.
      included_subnetwork_names: List of string names of the
        `subnetwork.Report`s that are included in the final ensemble.

    Returns:
      List of `adanet.subnetwork.MaterializedReport` objects.
    """

        # A metric is a tuple where the first element is a Tensor and
        # the second element is an update op. We collate the update ops here.
        metric_update_ops = []
        for subnetwork_report in subnetwork_reports.values():
            for metric_tuple in subnetwork_report.metrics.values():
                metric_update_ops.append(metric_tuple[1])

        # Extract the Tensors to be materialized.
        tensors_to_materialize = {}
        for name, subnetwork_report in subnetwork_reports.items():
            metrics = {
                metric_key: metric_tuple[0]
                for metric_key, metric_tuple in
                subnetwork_report.metrics.items()
            }
            tensors_to_materialize[name] = {
                "attributes": subnetwork_report.attributes,
                "metrics": metrics
            }

        if self.steps is None:
            logging_frequency = 1000
        elif self.steps < 10:
            logging_frequency = 1
        else:
            logging_frequency = math.floor(self.steps / 10.)

        steps_completed = 0
        while True:
            if self.steps is not None and steps_completed == self.steps:
                break
            try:
                steps_completed += 1
                if (steps_completed % logging_frequency == 0
                        or self.steps == steps_completed):
                    tf.logging.info("Report materialization [%d/%s]",
                                    steps_completed, self.steps or "??")

                sess.run(metric_update_ops)
            except tf.errors.OutOfRangeError:
                tf.logging.info(
                    "Encountered end of input during report materialization")
                break

        materialized_tensors_dict = sess.run(tensors_to_materialize)
        tf.logging.info("Materialized subnetwork_reports.")

        # Convert scalar ndarrays into python primitives, then place them into
        # subnetwork.MaterializedReports.
        materialized_reports = []
        for name, materialized_tensors in materialized_tensors_dict.items():
            attributes = {
                key: value.item() if hasattr(value, "item") else value
                for key, value in materialized_tensors["attributes"].items()
            }
            metrics = {
                key: value.item() if hasattr(value, "item") else value
                for key, value in materialized_tensors["metrics"].items()
            }
            materialized_reports.append(
                subnetwork.MaterializedReport(
                    iteration_number=iteration_number,
                    name=name,
                    hparams=subnetwork_reports[name].hparams,
                    attributes=attributes,
                    metrics=metrics,
                    included_in_final_ensemble=(name
                                                in included_subnetwork_names)))
        return materialized_reports
示例#6
0
class ReportMaterializerTest(parameterized.TestCase, tf.test.TestCase):

    # pylint: disable=g-long-lambda
    @parameterized.named_parameters(
        {
            "testcase_name":
            "one_empty_subnetwork",
            "input_fn":
            tu.dummy_input_fn([[1., 2]], [[3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo": subnetwork.Report(hparams={}, attributes={}, metrics={}
                                         ),
            },
            "steps":
            3,
            "included_subnetwork_names": ["foo"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo",
                    hparams={},
                    attributes={},
                    metrics={},
                    included_in_final_ensemble=True,
                ),
            ],
        }, {
            "testcase_name":
            "one_subnetwork",
            "input_fn":
            tu.dummy_input_fn([[1., 2]], [[3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo":
                subnetwork.Report(
                    hparams={
                        "learning_rate": 1.e-5,
                        "optimizer": "sgd",
                        "num_layers": 0,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": tf.constant(3.14),
                        "foo": tf.constant("bar"),
                        "parameters": tf.constant(7777),
                        "boo": tf.constant(True),
                    },
                    metrics={},
                ),
            },
            "steps":
            3,
            "included_subnetwork_names": ["foo"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo",
                    hparams={
                        "learning_rate": 1.e-5,
                        "optimizer": "sgd",
                        "num_layers": 0,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": 3.14,
                        "foo": "bar",
                        "parameters": 7777,
                        "boo": True,
                    },
                    metrics={},
                    included_in_final_ensemble=True,
                ),
            ],
        }, {
            "testcase_name":
            "one_subnetwork_iteration_2",
            "input_fn":
            tu.dummy_input_fn([[1., 2]], [[3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo":
                subnetwork.Report(
                    hparams={
                        "learning_rate": 1.e-5,
                        "optimizer": "sgd",
                        "num_layers": 0,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": tf.constant(3.14),
                        "foo": tf.constant("bar"),
                        "parameters": tf.constant(7777),
                        "boo": tf.constant(True),
                    },
                    metrics={},
                ),
            },
            "steps":
            3,
            "iteration_number":
            2,
            "included_subnetwork_names": ["foo"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=2,
                    name="foo",
                    hparams={
                        "learning_rate": 1.e-5,
                        "optimizer": "sgd",
                        "num_layers": 0,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": 3.14,
                        "foo": "bar",
                        "parameters": 7777,
                        "boo": True,
                    },
                    metrics={},
                    included_in_final_ensemble=True,
                ),
            ],
        }, {
            "testcase_name":
            "two_subnetworks",
            "input_fn":
            tu.dummy_input_fn([[1., 2]], [[3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo1":
                subnetwork.Report(
                    hparams={
                        "learning_rate": 1.e-5,
                        "optimizer": "sgd",
                        "num_layers": 0,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": tf.constant(3.14),
                        "foo": tf.constant("bar"),
                        "parameters": tf.constant(7777),
                        "boo": tf.constant(True),
                    },
                    metrics={},
                ),
                "foo2":
                subnetwork.Report(
                    hparams={
                        "learning_rate": 1.e-6,
                        "optimizer": "sgd",
                        "num_layers": 1,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": tf.constant(3.1445),
                        "foo": tf.constant("baz"),
                        "parameters": tf.constant(7788),
                        "boo": tf.constant(True),
                    },
                    metrics={},
                ),
            },
            "steps":
            3,
            "included_subnetwork_names": ["foo2"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo1",
                    hparams={
                        "learning_rate": 1.e-5,
                        "optimizer": "sgd",
                        "num_layers": 0,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": 3.14,
                        "foo": "bar",
                        "parameters": 7777,
                        "boo": True,
                    },
                    metrics={},
                    included_in_final_ensemble=False,
                ),
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo2",
                    hparams={
                        "learning_rate": 1.e-6,
                        "optimizer": "sgd",
                        "num_layers": 1,
                        "use_side_inputs": True,
                    },
                    attributes={
                        "weight_norms": 3.1445,
                        "foo": "baz",
                        "parameters": 7788,
                        "boo": True,
                    },
                    metrics={},
                    included_in_final_ensemble=True,
                ),
            ],
        }, {
            "testcase_name":
            "two_subnetworks_zero_included",
            "input_fn":
            tu.dummy_input_fn([[1., 2]], [[3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo1": subnetwork.Report(
                    hparams={},
                    attributes={},
                    metrics={},
                ),
                "foo2": subnetwork.Report(
                    hparams={},
                    attributes={},
                    metrics={},
                ),
            },
            "steps":
            3,
            "included_subnetwork_names": [],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo1",
                    hparams={},
                    attributes={},
                    metrics={},
                    included_in_final_ensemble=False,
                ),
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo2",
                    hparams={},
                    attributes={},
                    metrics={},
                    included_in_final_ensemble=False,
                ),
            ],
        }, {
            "testcase_name":
            "two_subnetworks_both_included",
            "input_fn":
            tu.dummy_input_fn([[1., 2]], [[3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo1": subnetwork.Report(
                    hparams={},
                    attributes={},
                    metrics={},
                ),
                "foo2": subnetwork.Report(
                    hparams={},
                    attributes={},
                    metrics={},
                ),
            },
            "steps":
            3,
            "included_subnetwork_names": ["foo1", "foo2"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo1",
                    hparams={},
                    attributes={},
                    metrics={},
                    included_in_final_ensemble=True,
                ),
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo2",
                    hparams={},
                    attributes={},
                    metrics={},
                    included_in_final_ensemble=True,
                ),
            ],
        }, {
            "testcase_name":
            "materialize_metrics",
            "input_fn":
            tu.dummy_input_fn([[1., 1.], [1., 1.], [1., 1.]],
                              [[1.], [2.], [3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo":
                subnetwork.Report(
                    hparams={},
                    attributes={},
                    metrics={"moo": tf.metrics.mean(labels)},
                ),
            },
            "steps":
            3,
            "included_subnetwork_names": ["foo"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo",
                    hparams={},
                    attributes={},
                    metrics={"moo": 2.},
                    included_in_final_ensemble=True,
                ),
            ],
        }, {
            "testcase_name":
            "materialize_metrics_none_steps",
            "input_fn":
            tu.dataset_input_fn([[1., 1.], [1., 1.], [1., 1.]],
                                [[1.], [2.], [3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo":
                subnetwork.Report(
                    hparams={},
                    attributes={},
                    metrics={"moo": tf.metrics.mean(labels)},
                ),
            },
            "steps":
            None,
            "included_subnetwork_names": ["foo"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo",
                    hparams={},
                    attributes={},
                    metrics={"moo": 2.},
                    included_in_final_ensemble=True,
                ),
            ],
        }, {
            "testcase_name":
            "materialize_metrics_non_tensor_op",
            "input_fn":
            tu.dummy_input_fn([[1., 2]], [[3.]]),
            "subnetwork_reports_fn":
            lambda features, labels: {
                "foo":
                subnetwork.Report(
                    hparams={},
                    attributes={},
                    metrics={"moo": (tf.constant(42), tf.no_op())},
                ),
            },
            "steps":
            3,
            "included_subnetwork_names": ["foo"],
            "want_materialized_reports": [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo",
                    hparams={},
                    attributes={},
                    metrics={"moo": 42},
                    included_in_final_ensemble=True,
                ),
            ],
        })
    def test_materialize_subnetwork_reports(self,
                                            input_fn,
                                            subnetwork_reports_fn,
                                            steps,
                                            iteration_number=0,
                                            included_subnetwork_names=None,
                                            want_materialized_reports=None):
        tf.constant(0.)  # dummy op so that the session graph is never empty.
        features, labels = input_fn()
        subnetwork_reports = subnetwork_reports_fn(features, labels)
        with self.test_session() as sess:
            sess.run(tf.initializers.local_variables())
            report_materializer = ReportMaterializer(input_fn=input_fn,
                                                     steps=steps)
            materialized_reports = (
                report_materializer.materialize_subnetwork_reports(
                    sess, iteration_number, subnetwork_reports,
                    included_subnetwork_names))
            self.assertEqual(len(want_materialized_reports),
                             len(materialized_reports))
            materialized_reports_dict = {
                blrm.name: blrm
                for blrm in materialized_reports
            }
            for want_materialized_report in want_materialized_reports:
                materialized_report = (
                    materialized_reports_dict[want_materialized_report.name])
                self.assertEqual(iteration_number,
                                 materialized_report.iteration_number)
                self.assertEqual(set(want_materialized_report.hparams.keys()),
                                 set(materialized_report.hparams.keys()))
                for hparam_key, want_hparam in (
                        want_materialized_report.hparams.items()):
                    if isinstance(want_hparam, float):
                        self.assertAllClose(
                            want_hparam,
                            materialized_report.hparams[hparam_key])
                    else:
                        self.assertEqual(
                            want_hparam,
                            materialized_report.hparams[hparam_key])

                self.assertSetEqual(
                    set(want_materialized_report.attributes.keys()),
                    set(materialized_report.attributes.keys()))
                for attribute_key, want_attribute in (
                        want_materialized_report.attributes.items()):
                    if isinstance(want_attribute, float):
                        self.assertAllClose(
                            want_attribute,
                            decode(
                                materialized_report.attributes[attribute_key]))
                    else:
                        self.assertEqual(
                            want_attribute,
                            decode(
                                materialized_report.attributes[attribute_key]))

                self.assertSetEqual(
                    set(want_materialized_report.metrics.keys()),
                    set(materialized_report.metrics.keys()))
                for metric_key, want_metric in (
                        want_materialized_report.metrics.items()):
                    if isinstance(want_metric, float):
                        self.assertAllClose(
                            want_metric,
                            decode(materialized_report.metrics[metric_key]))
                    else:
                        self.assertEqual(
                            want_metric,
                            decode(materialized_report.metrics[metric_key]))
    def test_add_to_existing_file(self):
        materialized_reports = [
            [
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo1",
                    hparams={
                        "p1": 11,
                        "p2": "hoo",
                        "p3": True,
                    },
                    attributes={
                        "a1": 11,
                        "a2": "aoo",
                        "a3": True,
                    },
                    metrics={
                        "m1": 11,
                        "m2": "moo",
                        "m3": True,
                    },
                    included_in_final_ensemble=False,
                ),
                subnetwork.MaterializedReport(
                    iteration_number=0,
                    name="foo2",
                    hparams={
                        "p1": 12,
                        "p2": "hoo",
                        "p3": True,
                    },
                    attributes={
                        "a1": 12,
                        "a2": "aoo",
                        "a3": True,
                    },
                    metrics={
                        "m1": 12,
                        "m2": "moo",
                        "m3": True,
                    },
                    included_in_final_ensemble=True,
                ),
            ],
            [
                subnetwork.MaterializedReport(
                    iteration_number=1,
                    name="foo1",
                    hparams={
                        "p1": 21,
                        "p2": "hoo",
                        "p3": True,
                    },
                    attributes={
                        "a1": 21,
                        "a2": "aoo",
                        "a3": True,
                    },
                    metrics={
                        "m1": 21,
                        "m2": "moo",
                        "m3": True,
                    },
                    included_in_final_ensemble=True,
                ),
                subnetwork.MaterializedReport(
                    iteration_number=1,
                    name="foo2",
                    hparams={
                        "p1": 22,
                        "p2": "hoo",
                        "p3": True,
                    },
                    attributes={
                        "a1": 22,
                        "a2": "aoo",
                        "a3": True,
                    },
                    metrics={
                        "m1": 22,
                        "m2": "moo",
                        "m3": True,
                    },
                    included_in_final_ensemble=False,
                ),
            ],
            [
                subnetwork.MaterializedReport(
                    iteration_number=2,
                    name="foo1",
                    hparams={
                        "p1": 31,
                        "p2": "hoo",
                        "p3": True,
                    },
                    attributes={
                        "a1": 31,
                        "a2": "aoo",
                        "a3": True,
                    },
                    metrics={
                        "m1": 31,
                        "m2": "moo",
                        "m3": True,
                    },
                    included_in_final_ensemble=False,
                ),
                subnetwork.MaterializedReport(
                    iteration_number=2,
                    name="foo2",
                    hparams={
                        "p1": 32,
                        "p2": "hoo",
                        "p3": True,
                    },
                    attributes={
                        "a1": 32,
                        "a2": "aoo",
                        "a3": True,
                    },
                    metrics={
                        "m1": 32,
                        "m2": "moo",
                        "m3": True,
                    },
                    included_in_final_ensemble=True,
                ),
            ],
        ]

        report_accessor = _ReportAccessor(self.get_temp_dir())

        report_accessor.write_iteration_report(0, materialized_reports[0])
        report_accessor.write_iteration_report(1, materialized_reports[1])
        report_accessor.write_iteration_report(2, materialized_reports[2])
        actual_reports = list(report_accessor.read_iteration_reports())
        self.assertEqual(materialized_reports, actual_reports)