def test_write_iteration_report_encoding(self): """Tests GitHub issue #4.""" report_accessor = _ReportAccessor(self.get_temp_dir()) binary_type_value = b"\n\x83\x01\n;adanet/iteration_2/ensemble_2_layer_dnn/" text_type_value = u"\U0001f937" materialized_reports = [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={ "p1": binary_type_value, "p2": text_type_value, }, attributes={ "a1": binary_type_value, "a2": text_type_value, }, metrics={ "m1": binary_type_value, "m2": text_type_value, }, included_in_final_ensemble=True, ), ] report_accessor.write_iteration_report( iteration_number=0, materialized_reports=materialized_reports, ) actual_iteration_reports = list( report_accessor.read_iteration_reports()) self.assertLen(actual_iteration_reports, 1)
def _iteration_report_pb_to_subnetwork_reports(iteration_report_pb): """Converts IterationReport proto to a `MaterializedReport` list.""" def _proto_map_to_dict(proto, field_name): """Converts map field of a proto to a dict. Args: proto: the proto to read from. field_name: name of the map field in the proto. Returns: Dict with the keys and values in proto.field_name. Raises: ValueError: if proto.field_name has a value that's not an int_value, float_value, bool_value, bytes_value, or string_value. """ dictionary = {} proto_field = getattr(proto, field_name) for key in proto_field: if proto_field[key].HasField("int_value"): value = proto_field[key].int_value elif proto_field[key].HasField("float_value"): value = proto_field[key].float_value elif proto_field[key].HasField("bytes_value"): value = proto_field[key].bytes_value elif proto_field[key].HasField("string_value"): value = proto_field[key].string_value elif proto_field[key].HasField("bool_value"): value = proto_field[key].bool_value else: raise ValueError("{} map in subnetwork_report_pb has invalid field. " "key: {} value: {} type: {}".format( field_name, key, proto_field[key], type(proto_field[key]))) dictionary[key] = value return dictionary return [ subnetwork.MaterializedReport( iteration_number=iteration_report_pb.iteration_number, name=subnetwork_report_pb.name, hparams=_proto_map_to_dict( proto=subnetwork_report_pb, field_name="hparams"), attributes=_proto_map_to_dict( proto=subnetwork_report_pb, field_name="attributes"), metrics=_proto_map_to_dict( proto=subnetwork_report_pb, field_name="metrics"), included_in_final_ensemble=( subnetwork_report_pb.included_in_final_ensemble)) for subnetwork_report_pb in iteration_report_pb.subnetwork_reports ]
def _json_report_to_materialized_report(iteration_report_json): """Converts a JSON loaded iteration report to a `MaterializedReport` list.""" subnetwork_reports = [] for subnetwork_report_json in iteration_report_json["subnetwork_reports"]: subnetwork_reports.append( subnetwork.MaterializedReport( iteration_number=int(iteration_report_json["iteration_number"]), name=subnetwork_report_json["name"], hparams=subnetwork_report_json["hparams"], attributes=subnetwork_report_json["attributes"], metrics=subnetwork_report_json["metrics"], included_in_final_ensemble=subnetwork_report_json[ "included_in_final_ensemble"])) return subnetwork_reports
def test_add_to_empty_file(self): report_accessor = _ReportAccessor(self.get_temp_dir()) materialized_reports = [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={ "p1": 1, "p2": "default_hparam", "p3": b"binary_hparam", "p4": u"unicode_hparam", "p5": True, }, attributes={ "a1": 1, "a2": "default_attribute", "a3": b"binary_attribute", "a4": u"unicode_attribute", "a5": True, }, metrics={ "m1": 1, "m2": "default_metric", "m3": b"binary_metric", "m4": u"unicode_metric", "m5": True, }, included_in_final_ensemble=True, ), ] report_accessor.write_iteration_report( iteration_number=0, materialized_reports=materialized_reports, ) actual_iteration_reports = list( report_accessor.read_iteration_reports()) self.assertLen(actual_iteration_reports, 1) self.assertEqual(materialized_reports, actual_iteration_reports[0])
def test_value_error(self, hparams=None, attributes=None, metrics=None): if hparams is None: hparams = {} if attributes is None: attributes = {} if metrics is None: metrics = {} report_accessor = _ReportAccessor(self.get_temp_dir()) materialized_reports = [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams=hparams, attributes=attributes, metrics=metrics, included_in_final_ensemble=True, ), ] with self.assertRaises(ValueError): report_accessor.write_iteration_report( iteration_number=0, materialized_reports=materialized_reports, )
def materialize_subnetwork_reports(self, sess, iteration_number, subnetwork_reports, included_subnetwork_names): """Materializes the Tensor objects in subnetwork_reports using sess. This converts the Tensors in subnetwork_reports to ndarrays, logs the progress, converts the ndarrays to python primitives, then packages them into `adanet.subnetwork.MaterializedReports`. Args: sess: `Session` instance with most recent variable values loaded. iteration_number: Integer iteration number. subnetwork_reports: Dict mapping string names to `subnetwork.Report` objects to be materialized. included_subnetwork_names: List of string names of the `subnetwork.Report`s that are included in the final ensemble. Returns: List of `adanet.subnetwork.MaterializedReport` objects. """ # A metric is a tuple where the first element is a Tensor and # the second element is an update op. We collate the update ops here. metric_update_ops = [] for subnetwork_report in subnetwork_reports.values(): for metric_tuple in subnetwork_report.metrics.values(): metric_update_ops.append(metric_tuple[1]) # Extract the Tensors to be materialized. tensors_to_materialize = {} for name, subnetwork_report in subnetwork_reports.items(): metrics = { metric_key: metric_tuple[0] for metric_key, metric_tuple in subnetwork_report.metrics.items() } tensors_to_materialize[name] = { "attributes": subnetwork_report.attributes, "metrics": metrics } if self.steps is None: logging_frequency = 1000 elif self.steps < 10: logging_frequency = 1 else: logging_frequency = math.floor(self.steps / 10.) steps_completed = 0 while True: if self.steps is not None and steps_completed == self.steps: break try: steps_completed += 1 if (steps_completed % logging_frequency == 0 or self.steps == steps_completed): logging.info("Report materialization [%d/%s]", steps_completed, self.steps or "??") sess.run(metric_update_ops) except tf.errors.OutOfRangeError: logging.info( "Encountered end of input during report materialization") break materialized_tensors_dict = sess.run(tensors_to_materialize) logging.info("Materialized subnetwork_reports.") # Convert scalar ndarrays into python primitives, then place them into # subnetwork.MaterializedReports. materialized_reports = [] for name, materialized_tensors in materialized_tensors_dict.items(): attributes = { key: value.item() if hasattr(value, "item") else value for key, value in materialized_tensors["attributes"].items() } metrics = { key: value.item() if hasattr(value, "item") else value for key, value in materialized_tensors["metrics"].items() } materialized_reports.append( subnetwork.MaterializedReport( iteration_number=iteration_number, name=name, hparams=subnetwork_reports[name].hparams, attributes=attributes, metrics=metrics, included_in_final_ensemble=(name in included_subnetwork_names))) return materialized_reports
class ReportMaterializerTest(parameterized.TestCase, tf.test.TestCase): # pylint: disable=g-long-lambda @parameterized.named_parameters( { "testcase_name": "one_empty_subnetwork", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report(hparams={}, attributes={}, metrics={}), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "one_subnetwork", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.14), "foo": tf.constant("bar"), "parameters": tf.constant(7777), "boo": tf.constant(True), }, metrics={}, ), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": 3.14, "foo": "bar", "parameters": 7777, "boo": True, }, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "one_subnetwork_iteration_2", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.14), "foo": tf.constant("bar"), "parameters": tf.constant(7777), "boo": tf.constant(True), }, metrics={}, ), }, "steps": 3, "iteration_number": 2, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=2, name="foo", hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": 3.14, "foo": "bar", "parameters": 7777, "boo": True, }, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "two_subnetworks", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo1": subnetwork.Report( hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.14), "foo": tf.constant("bar"), "parameters": tf.constant(7777), "boo": tf.constant(True), }, metrics={}, ), "foo2": subnetwork.Report( hparams={ "learning_rate": 1.e-6, "optimizer": "sgd", "num_layers": 1, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.1445), "foo": tf.constant("baz"), "parameters": tf.constant(7788), "boo": tf.constant(True), }, metrics={}, ), }, "steps": 3, "included_subnetwork_names": ["foo2"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo1", hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": 3.14, "foo": "bar", "parameters": 7777, "boo": True, }, metrics={}, included_in_final_ensemble=False, ), subnetwork.MaterializedReport( iteration_number=0, name="foo2", hparams={ "learning_rate": 1.e-6, "optimizer": "sgd", "num_layers": 1, "use_side_inputs": True, }, attributes={ "weight_norms": 3.1445, "foo": "baz", "parameters": 7788, "boo": True, }, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "two_subnetworks_zero_included", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo1": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), "foo2": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), }, "steps": 3, "included_subnetwork_names": [], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo1", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=False, ), subnetwork.MaterializedReport( iteration_number=0, name="foo2", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=False, ), ], }, { "testcase_name": "two_subnetworks_both_included", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo1": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), "foo2": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), }, "steps": 3, "included_subnetwork_names": ["foo1", "foo2"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo1", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=True, ), subnetwork.MaterializedReport( iteration_number=0, name="foo2", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "materialize_metrics", "input_fn": tu.dummy_input_fn([[1., 1.], [1., 1.], [1., 1.]], [[1.], [2.], [3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={}, attributes={}, metrics={"moo": tf_compat.v1.metrics.mean(labels)}, ), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={"moo": 2.}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "materialize_metrics_none_steps", "input_fn": tu.dataset_input_fn([[1., 1.], [1., 1.], [1., 1.]], [[1.], [2.], [3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={}, attributes={}, metrics={"moo": tf_compat.v1.metrics.mean(labels)}, ), }, "steps": None, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={"moo": 2.}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "materialize_metrics_non_tensor_op", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={}, attributes={}, metrics={"moo": (tf.constant(42), tf.no_op())}, ), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={"moo": 42}, included_in_final_ensemble=True, ), ], }) @test_util.run_in_graph_and_eager_modes def test_materialize_subnetwork_reports(self, input_fn, subnetwork_reports_fn, steps, iteration_number=0, included_subnetwork_names=None, want_materialized_reports=None): with context.graph_mode(): tf.constant(0.) # dummy op so that the session graph is never empty. features, labels = input_fn() subnetwork_reports = subnetwork_reports_fn(features, labels) with self.test_session() as sess: sess.run(tf_compat.v1.initializers.local_variables()) report_materializer = ReportMaterializer(input_fn=input_fn, steps=steps) materialized_reports = ( report_materializer.materialize_subnetwork_reports( sess, iteration_number, subnetwork_reports, included_subnetwork_names)) self.assertEqual( len(want_materialized_reports), len(materialized_reports)) materialized_reports_dict = { blrm.name: blrm for blrm in materialized_reports } for want_materialized_report in want_materialized_reports: materialized_report = ( materialized_reports_dict[want_materialized_report.name]) self.assertEqual(iteration_number, materialized_report.iteration_number) self.assertEqual( set(want_materialized_report.hparams.keys()), set(materialized_report.hparams.keys())) for hparam_key, want_hparam in ( want_materialized_report.hparams.items()): if isinstance(want_hparam, float): self.assertAllClose(want_hparam, materialized_report.hparams[hparam_key]) else: self.assertEqual(want_hparam, materialized_report.hparams[hparam_key]) self.assertSetEqual( set(want_materialized_report.attributes.keys()), set(materialized_report.attributes.keys())) for attribute_key, want_attribute in ( want_materialized_report.attributes.items()): if isinstance(want_attribute, float): self.assertAllClose( want_attribute, decode(materialized_report.attributes[attribute_key])) else: self.assertEqual( want_attribute, decode(materialized_report.attributes[attribute_key])) self.assertSetEqual( set(want_materialized_report.metrics.keys()), set(materialized_report.metrics.keys())) for metric_key, want_metric in ( want_materialized_report.metrics.items()): if isinstance(want_metric, float): self.assertAllClose( want_metric, decode(materialized_report.metrics[metric_key])) else: self.assertEqual(want_metric, decode(materialized_report.metrics[metric_key]))
def test_add_to_existing_file(self): materialized_reports = [ [ subnetwork.MaterializedReport( iteration_number=0, name="foo1", hparams={ "p1": 11, "p2": "default_hparam", "p3": b"binary_hparam", "p4": u"unicode_hparam", "p5": True, }, attributes={ "a1": 11, "a2": "default_attribute", "a3": b"binary_attribute", "a4": u"unicode_attribute", "a5": True, }, metrics={ "m1": 11, "m2": "default_metric", "m3": b"binary_metric", "m4": u"unicode_metric", "m5": True, }, included_in_final_ensemble=False, ), subnetwork.MaterializedReport( iteration_number=0, name="foo2", hparams={ "p1": 12, "p2": "default_hparam", "p3": b"binary_hparam", "p4": u"unicode_hparam", "p5": True, }, attributes={ "a1": 12, "a2": "default_attribute", "a3": b"binary_attribute", "a4": u"unicode_attribute", "a5": True, }, metrics={ "m1": 12, "m2": "default_metric", "m3": b"binary_metric", "m4": u"unicode_metric", "m5": True, }, included_in_final_ensemble=True, ), ], [ subnetwork.MaterializedReport( iteration_number=1, name="foo1", hparams={ "p1": 21, "p2": "default_hparam", "p3": b"binary_hparam", "p4": u"unicode_hparam", "p5": True, }, attributes={ "a1": 21, "a2": "default_attribute", "a3": b"binary_attribute", "a4": u"unicode_attribute", "a5": True, }, metrics={ "m1": 21, "m2": "default_metric", "m3": b"binary_metric", "m4": u"unicode_metric", "m5": True, }, included_in_final_ensemble=True, ), subnetwork.MaterializedReport( iteration_number=1, name="foo2", hparams={ "p1": 22, "p2": "default_hparam", "p3": b"binary_hparam", "p4": u"unicode_hparam", "p5": True, }, attributes={ "a1": 22, "a2": "default_attribute", "a3": b"binary_attribute", "a4": u"unicode_attribute", "a5": True, }, metrics={ "m1": 22, "m2": "default_metric", "m3": b"binary_metric", "m4": u"unicode_metric", "m5": True, }, included_in_final_ensemble=False, ), ], [ subnetwork.MaterializedReport( iteration_number=2, name="foo1", hparams={ "p1": 31, "p2": "default_hparam", "p3": b"binary_hparam", "p4": u"unicode_hparam", "p5": True, }, attributes={ "a1": 31, "a2": "default_attribute", "a3": b"binary_attribute", "a4": u"unicode_attribute", "a5": True, }, metrics={ "m1": 31, "m2": "default_metric", "m3": b"binary_metric", "m4": u"unicode_metric", "m5": True, }, included_in_final_ensemble=False, ), subnetwork.MaterializedReport( iteration_number=2, name="foo2", hparams={ "p1": 32, "p2": "default_hparam", "p3": b"binary_hparam", "p4": u"unicode_hparam", "p5": True, }, attributes={ "a1": 32, "a2": "default_attribute", "a3": b"binary_attribute", "a4": u"unicode_attribute", "a5": True, }, metrics={ "m1": 32, "m2": "default_metric", "m3": b"binary_metric", "m4": u"unicode_metric", "m5": True, }, included_in_final_ensemble=True, ), ], ] report_accessor = _ReportAccessor(self.get_temp_dir()) report_accessor.write_iteration_report(0, materialized_reports[0]) report_accessor.write_iteration_report(1, materialized_reports[1]) report_accessor.write_iteration_report(2, materialized_reports[2]) actual_reports = list(report_accessor.read_iteration_reports()) self.assertEqual(materialized_reports, actual_reports)