def test_filter_by_child(self): """Tests filter_by_child.""" root = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) root_2 = filter_expression.filter_by_child(root, path.create_path("doc"), "keep_me", "new_doc") [result] = calculate.calculate_prensors([root_2]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc" ])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me" ])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me"])).node.values, [True]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar" ])).node.parent_index, [0, 0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"b", b"c"])
def test_slice_end(self): with self.session(use_gpu=False) as sess: root = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) root_2 = slice_expression.slice_expression(root, path.Path(["doc"]), "new_doc", None, 1) result = prensor_value.materialize( calculate.calculate_prensors([root_2])[0], sess) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc"])).node.parent_index, [0, 1]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.parent_index, [0, 1]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.values, [False, True]) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc", "bar"])).node.parent_index, [0, 1, 1]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"a", b"b", b"c"])
def test_filter_by_child_create_nested_prensor_2(self): """Tests filter_by_child. In particular, it checks for the case where parent_index != self index. """ root = create_expression.create_expression_from_prensor( _create_nested_prensor_2()) root_2 = filter_expression.filter_by_child(root, path.create_path("doc"), "keep_me", "new_doc") [result] = calculate.calculate_prensors([root_2]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc" ])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me" ])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me"])).node.values, [True]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar" ])).node.parent_index, [0, 0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"b", b"c"])
def test_e2e_proto(self): """Integration test for parsing protobufs.""" serialized = tf.constant([ text_format.Merge( """ session_info { session_duration_sec: 1.0 session_feature: "foo" } event { query: "Hello" action { number_of_views: 1 } action { } } event { query: "world" action { number_of_views: 2 } action { number_of_views: 3 } } """, test_pb2.Session()).SerializeToString() ]) expr = proto.create_expression_from_proto( serialized, test_pb2.Session().DESCRIPTOR) [p] = calculate.calculate_prensors([expr]) print(p) st = prensor_to_structured_tensor.prensor_to_structured_tensor(p) print(st)
def test_filter_by_child_create_nested_prensor(self): """Tests filter_by_child.""" with self.session(use_gpu=False) as sess: root = create_expression.create_expression_from_prensor( _create_nested_prensor()) root_2 = filter_expression.filter_by_child(root, path.create_path("doc"), "keep_me", "new_doc") result = prensor_value.materialize( calculate.calculate_prensors([root_2])[0], sess) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc"])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.values, [True]) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc", "bar"])).node.parent_index, [0, 0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"b", b"c"])
def test_calculate_tree_root_direct(self): """Calculates the value of a tree with no sources.""" for options in options_to_test: tree = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) [new_expr] = calculate.calculate_prensors([tree], options=options) self.assertAllEqual(new_expr.node.size, 3)
def calculate_list_map(expr: expression.Expression, evaluator): """Calculate a map from paths to nested lists, representing the leafs.""" [my_prensor] = calculate.calculate_prensors([expr]) ragged_tensor_map = prensor_util.get_ragged_tensors( my_prensor, calculate_options.get_default_options()) string_tensor_map = {str(k): v for k, v in ragged_tensor_map.items()} string_np_map = evaluator.evaluate(string_tensor_map) return {k: v.to_list() for k, v in string_np_map.items()}
def test_calculate_tree_root_direct(self): """Calculates the value of a tree with no sources.""" for options in options_to_test: with self.session(use_gpu=False) as sess: tree = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) [new_expr] = calculate.calculate_prensors([tree], options=options) size_result = sess.run(new_expr.node.size) self.assertAllEqual(size_result, 3)
def calculate_list_map(expr: expression.Expression, evaluator, options: Optional[calculate_options.Options] = None): """Calculate a map from paths to nested lists, representing the leafs.""" [my_prensor] = calculate.calculate_prensors([expr], options=options) if not options: options = calculate_options.get_default_options() ragged_tensor_map = my_prensor.get_ragged_tensors(options) string_tensor_map = {str(k): v for k, v in ragged_tensor_map.items()} string_np_map = evaluator.evaluate(string_tensor_map) return {k: v.to_list() for k, v in string_np_map.items()}
def test_slice_mask_begin_negative(self): root = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) root_2, new_path = slice_expression._get_slice_mask( root, path.Path(["doc"]), -1, None) result = calculate.calculate_prensors([root_2])[0] self.assertAllEqual( result.get_descendant_or_error(new_path).node.parent_index, [0, 1, 1]) self.assertAllEqual( result.get_descendant_or_error(new_path).node.values, [True, False, True])
def test_filter_by_sibling(self): r"""Tests filter_by_sibling. Beginning with the struct: -----*---------------------------------------------------- / \ \ root0 root1----------------------- root2 (empty) / \ / \ \ \ | keep_my_sib0:False | keep_my_sib1:True | keep_my_sib2:False doc0----- doc1--------------- doc2-------- | \ \ \ \ \ bar:"a" keep_me:False bar:"b" bar:"c" keep_me:True bar:"d" Filter doc with keep_my_sib: End with the struct (suppressing original doc): -----*---------------------------------------------------- / \ \ root0 root1------------------ root2 (empty) \ / \ \ keep_my_sib0:False | keep_my_sib1:True keep_my_sib2:False new_doc0----------- \ \ \ bar:"b" bar:"c" keep_me:True """ root = create_expression.create_expression_from_prensor( _create_nested_prensor()) root_2 = filter_expression.filter_by_sibling(root, path.create_path("doc"), "keep_my_sib", "new_doc") [result] = calculate.calculate_prensors([root_2]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc" ])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me" ])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me"])).node.values, [True]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar" ])).node.parent_index, [0, 0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"b", b"c"])
def test_slice_mask_begin_positive(self): with self.session(use_gpu=False) as sess: root = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) root_2, new_path = slice_expression._get_slice_mask( root, path.Path(["doc"]), 1, None) result = prensor_value.materialize( calculate.calculate_prensors([root_2])[0], sess) self.assertAllEqual( result.get_descendant_or_error(new_path).node.parent_index, [0, 1, 1]) self.assertAllEqual( result.get_descendant_or_error(new_path).node.values, [False, False, True])
def test_create_proto_index_directly_reroot_at_action_sparse_dense(self): sessions = [ """ event { action {} action {} } event {} event { action {} } """, "", """ event {} event { action {} action {} } event { } """ ] with self.session(use_gpu=False) as sess: expr = proto_test_util.text_to_expression(sessions, test_pb2.Session) reroot_expr = expr.reroot("event.action") # Reroot with a depth > 1 (all the other cases are depth == 1) [prensor_tree] = calculate.calculate_prensors([ reroot_expr.create_proto_index( "proto_index_directly_reroot_at_action") ]) proto_index_node = prensor_tree.get_child_or_error( "proto_index_directly_reroot_at_action").node self.assertFalse(proto_index_node.is_repeated) sparse_tensors = prensor_util.get_sparse_tensors( prensor_tree, calculate_options.get_default_options()) proto_index_directly_reroot_at_action = sparse_tensors[path.Path( ["proto_index_directly_reroot_at_action"])] [sparse_value, dense_value] = sess.run([ proto_index_directly_reroot_at_action, tf.sparse_tensor_to_dense(proto_index_directly_reroot_at_action) ]) self.assertAllEqual(sparse_value.values, [0, 0, 0, 2, 2]) self.assertAllEqual(sparse_value.indices, [[0], [1], [2], [3], [4]]) self.assertAllEqual(sparse_value.dense_shape, [5]) self.assertAllEqual(dense_value, [0, 0, 0, 2, 2])
def test_slice_begin(self): """Test slice with only begin specified. Starts with: { foo:9, foorepeated:[9], doc:[{ bar:["a"], keep_me:False }], user:[ { friends:["a"] }] } {foo:8, foorepeated:[8,7], doc:[{ bar:["b","c"], keep_me:True },{ bar:["d"] }], user:[{ friends:["b", "c"] },{ friends:["d"] }], } {foo:7, foorepeated:[6], user:[{friends:["e"]}]} Creates new_doc by slicing doc[1:]: {foo:9, foorepeated:[9], doc:[{ bar:["a"], keep_me:False }], user:[{ friends:["a"] }]} {foo:8, foorepeated:[8,7], doc:[{ bar:["b","c"], keep_me:True },{ bar:["d"] }], new_doc[{ bar:["d"] }], user:[{ friends:["b", "c"] },{ friends:["d"]}],} {foo:7, foorepeated:[6], user:[{ friends:["e"] }]} """ with self.session(use_gpu=False) as sess: root = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) root_2 = slice_expression.slice_expression(root, path.Path(["doc"]), "new_doc", 1, None) result = prensor_value.materialize( calculate.calculate_prensors([root_2])[0], sess) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc"])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.parent_index, []) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.values, []) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc", "bar"])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"d"])
def test_map_prensor_to_prensor(self): original = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) def my_prensor_op(original_prensor): # Note that we are copying over the original root prensor node. The root # node is ignored in the result. return prensor.create_prensor_from_descendant_nodes({ path.Path([]): original_prensor.node, path.Path(["bar2"]): original_prensor.get_child_or_error("bar").node, path.Path(["keep_me2"]): original_prensor.get_child_or_error("keep_me").node }) # Since the top node is actually a child node, we use the child schema. my_output_schema = map_prensor_to_prensor.create_schema( is_repeated=True, children={ "bar2": { "is_repeated": True, "dtype": tf.string }, "keep_me2": { "is_repeated": False, "dtype": tf.bool } }) result = map_prensor_to_prensor.map_prensor_to_prensor( root_expr=original, source=path.Path(["doc"]), paths_needed=[path.Path(["bar"]), path.Path(["keep_me"])], prensor_op=my_prensor_op, output_schema=my_output_schema) doc_result = result.get_child_or_error("doc") bar_result = doc_result.get_child_or_error("bar") keep_me_result = doc_result.get_child_or_error("keep_me") bar2_result = doc_result.get_child_or_error("bar2") keep_me2_result = doc_result.get_child_or_error("keep_me2") self.assertIsNone(doc_result.get_child("missing_field")) self.assertTrue(bar_result.is_repeated) self.assertTrue(bar2_result.is_repeated) self.assertEqual(bar_result.type, tf.string) self.assertEqual(bar2_result.type, tf.string) self.assertFalse(keep_me_result.is_repeated) self.assertFalse(keep_me2_result.is_repeated) self.assertEqual(keep_me_result.type, tf.bool) self.assertEqual(keep_me2_result.type, tf.bool) [prensor_result] = calculate.calculate_prensors([result]) doc_value = prensor_result.get_child_or_error("doc") self.assertAllEqual([0, 1, 1], doc_value.node.parent_index) bar2_value = doc_value.get_child_or_error("bar2") self.assertAllEqual([0, 1, 1, 2], bar2_value.node.parent_index) self.assertAllEqual([b"a", b"b", b"c", b"d"], bar2_value.node.values) keep_me2_value = doc_value.get_child_or_error("keep_me2") self.assertAllEqual([0, 1], keep_me2_value.node.parent_index) self.assertAllEqual([False, True], keep_me2_value.node.values)
def parse_elwc_with_struct2tensor( records: tf.Tensor, context_features: List[Feature], example_features: List[Feature], size_feature_name: Optional[str] = None) -> Dict[str, tf.RaggedTensor]: """Parses a batch of ELWC records into RaggedTensors using struct2tensor. Args: records: A dictionary with a single item. The value of this single item is the serialized ELWC input. context_features: List of context-level features. example_features: List of example-level features. size_feature_name: A string, the name of a feature for example list sizes. If None, which is default, this feature is not generated. Otherwise the feature is added to the feature dict. Returns: A dict that maps feature name to RaggedTensors. """ def get_step_name(feature_name: str): """Gets the name of the step (a component in a prensor Path) for a feature. A prensor step cannot contain dots ("."), but a feature name can. Args: feature_name: name of the feature Returns: a valid step name. """ return feature_name.replace('.', '_dot_') def get_default_filled_step_name(feature_name: str): return get_step_name(feature_name) + _DEFAULT_VALUE_SUFFIX def get_context_feature_path(feature: Feature): list_name = _TYPE_LIST_MAP.get(feature.dtype) return path.Path([ 'context', 'features', 'feature[{}]'.format(feature.name), list_name, 'value' ]) def get_example_feature_path(feature: Feature): list_name = _TYPE_LIST_MAP.get(feature.dtype) return path.Path([ 'examples', 'features', 'feature[{}]'.format(feature.name), list_name, 'value' ]) def get_promote_and_project_maps(features: List[Feature], is_context: bool): promote_map = {} project_map = {} if is_context: get_feature_path = get_context_feature_path get_promote_destination = lambda leaf_name: path.Path([leaf_name]) else: get_feature_path = get_example_feature_path get_promote_destination = lambda leaf_name: path.Path( # pylint: disable=g-long-lambda ['examples', leaf_name]) for feature in features: promote_map[get_step_name( feature.name)] = get_feature_path(feature) leaf_name = (get_step_name(feature.name) if feature.default_value is None else get_default_filled_step_name(feature.name)) project_map[feature.name] = get_promote_destination(leaf_name) return promote_map, project_map def get_pad_2d_ragged_fn(feature: Feature): def pad_2d_ragged(rt): dense = rt.to_tensor(shape=[None, feature.length], default_value=feature.default_value) flattened = tf.reshape(dense, [-1]) return tf.RaggedTensor.from_uniform_row_length(flattened, feature.length, validate=False) return pad_2d_ragged context_promote_map, context_keys_to_promoted_paths = ( get_promote_and_project_maps(context_features, is_context=True)) examples_promote_map, examples_keys_to_promoted_paths = ( get_promote_and_project_maps(example_features, is_context=False)) # Build the struct2tensor query. s2t_expr = (proto_expr.create_expression_from_proto( records, input_pb2.ExampleListWithContext.DESCRIPTOR).promote_and_broadcast( context_promote_map, path.Path([])).promote_and_broadcast(examples_promote_map, path.Path(['examples']))) # Pad features that have default_values specified. for features, parent_path in [(context_features, path.Path([])), (example_features, path.Path(['examples']))]: for feature in features: if feature.default_value is not None: s2t_expr = s2t_expr.map_ragged_tensors( parent_path=parent_path, source_fields=[get_step_name(feature.name)], operator=get_pad_2d_ragged_fn(feature), is_repeated=True, dtype=feature.dtype, new_field_name=get_default_filled_step_name(feature.name)) to_project = list( itertools.chain(context_keys_to_promoted_paths.values(), examples_keys_to_promoted_paths.values())) if size_feature_name is not None: s2t_expr = s2t_expr.create_size_field(path.Path(['examples']), get_step_name(size_feature_name)) to_project.append(path.Path([get_step_name(size_feature_name)])) projection = s2t_expr.project(to_project) options = calculate_options.get_options_with_minimal_checks() prensor_result = calculate.calculate_prensors([projection], options)[0] # a map from path.Path to RaggedTensors. projected_with_paths = prensor_util.get_ragged_tensors( prensor_result, options) context_dict = { f: projected_with_paths[context_keys_to_promoted_paths[f]] for f in context_keys_to_promoted_paths } examples_dict = { f: projected_with_paths[examples_keys_to_promoted_paths[f]] for f in examples_keys_to_promoted_paths } result = {} result.update(context_dict) result.update(examples_dict) if size_feature_name is not None: result[size_feature_name] = projected_with_paths[path.Path( [get_step_name(size_feature_name)])] return result