示例#1
0
 def test_filter_by_child(self):
     """Tests filter_by_child."""
     root = create_expression.create_expression_from_prensor(
         prensor_test_util.create_big_prensor())
     root_2 = filter_expression.filter_by_child(root,
                                                path.create_path("doc"),
                                                "keep_me", "new_doc")
     [result] = calculate.calculate_prensors([root_2])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc"
                                                   ])).node.parent_index,
         [1])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc", "keep_me"
                                                   ])).node.parent_index,
         [0])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc",
                                                   "keep_me"])).node.values,
         [True])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc", "bar"
                                                   ])).node.parent_index,
         [0, 0])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc",
                                                   "bar"])).node.values,
         [b"b", b"c"])
    def test_slice_end(self):
        with self.session(use_gpu=False) as sess:

            root = create_expression.create_expression_from_prensor(
                prensor_test_util.create_big_prensor())
            root_2 = slice_expression.slice_expression(root,
                                                       path.Path(["doc"]),
                                                       "new_doc", None, 1)
            result = prensor_value.materialize(
                calculate.calculate_prensors([root_2])[0], sess)

            self.assertAllEqual(
                result.get_descendant_or_error(path.Path(
                    ["new_doc"])).node.parent_index, [0, 1])
            self.assertAllEqual(
                result.get_descendant_or_error(
                    path.Path(["new_doc", "keep_me"])).node.parent_index,
                [0, 1])
            self.assertAllEqual(
                result.get_descendant_or_error(
                    path.Path(["new_doc", "keep_me"])).node.values,
                [False, True])
            self.assertAllEqual(
                result.get_descendant_or_error(path.Path(
                    ["new_doc", "bar"])).node.parent_index, [0, 1, 1])
            self.assertAllEqual(
                result.get_descendant_or_error(path.Path(["new_doc",
                                                          "bar"])).node.values,
                [b"a", b"b", b"c"])
示例#3
0
    def test_filter_by_child_create_nested_prensor_2(self):
        """Tests filter_by_child.

    In particular, it checks for the case where parent_index != self index.
    """
        root = create_expression.create_expression_from_prensor(
            _create_nested_prensor_2())
        root_2 = filter_expression.filter_by_child(root,
                                                   path.create_path("doc"),
                                                   "keep_me", "new_doc")
        [result] = calculate.calculate_prensors([root_2])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc"
                                                      ])).node.parent_index,
            [1])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "keep_me"
                                                      ])).node.parent_index,
            [0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "keep_me"])).node.values,
            [True])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "bar"
                                                      ])).node.parent_index,
            [0, 0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "bar"])).node.values,
            [b"b", b"c"])
示例#4
0
 def test_e2e_proto(self):
     """Integration test for parsing protobufs."""
     serialized = tf.constant([
         text_format.Merge(
             """
     session_info {
       session_duration_sec: 1.0
       session_feature: "foo"
     }
     event {
       query: "Hello"
       action {
         number_of_views: 1
       }
       action {
       }
     }
     event {
       query: "world"
       action {
         number_of_views: 2
       }
       action {
         number_of_views: 3
       }
     }
     """, test_pb2.Session()).SerializeToString()
     ])
     expr = proto.create_expression_from_proto(
         serialized,
         test_pb2.Session().DESCRIPTOR)
     [p] = calculate.calculate_prensors([expr])
     print(p)
     st = prensor_to_structured_tensor.prensor_to_structured_tensor(p)
     print(st)
 def test_filter_by_child_create_nested_prensor(self):
     """Tests filter_by_child."""
     with self.session(use_gpu=False) as sess:
         root = create_expression.create_expression_from_prensor(
             _create_nested_prensor())
         root_2 = filter_expression.filter_by_child(root,
                                                    path.create_path("doc"),
                                                    "keep_me", "new_doc")
         result = prensor_value.materialize(
             calculate.calculate_prensors([root_2])[0], sess)
         self.assertAllEqual(
             result.get_descendant_or_error(path.Path(
                 ["new_doc"])).node.parent_index, [1])
         self.assertAllEqual(
             result.get_descendant_or_error(
                 path.Path(["new_doc", "keep_me"])).node.parent_index, [0])
         self.assertAllEqual(
             result.get_descendant_or_error(
                 path.Path(["new_doc", "keep_me"])).node.values, [True])
         self.assertAllEqual(
             result.get_descendant_or_error(path.Path(
                 ["new_doc", "bar"])).node.parent_index, [0, 0])
         self.assertAllEqual(
             result.get_descendant_or_error(path.Path(["new_doc",
                                                       "bar"])).node.values,
             [b"b", b"c"])
示例#6
0
 def test_calculate_tree_root_direct(self):
   """Calculates the value of a tree with no sources."""
   for options in options_to_test:
     tree = create_expression.create_expression_from_prensor(
         prensor_test_util.create_simple_prensor())
     [new_expr] = calculate.calculate_prensors([tree], options=options)
     self.assertAllEqual(new_expr.node.size, 3)
示例#7
0
def calculate_list_map(expr: expression.Expression, evaluator):
    """Calculate a map from paths to nested lists, representing the leafs."""
    [my_prensor] = calculate.calculate_prensors([expr])
    ragged_tensor_map = prensor_util.get_ragged_tensors(
        my_prensor, calculate_options.get_default_options())
    string_tensor_map = {str(k): v for k, v in ragged_tensor_map.items()}
    string_np_map = evaluator.evaluate(string_tensor_map)
    return {k: v.to_list() for k, v in string_np_map.items()}
示例#8
0
 def test_calculate_tree_root_direct(self):
     """Calculates the value of a tree with no sources."""
     for options in options_to_test:
         with self.session(use_gpu=False) as sess:
             tree = create_expression.create_expression_from_prensor(
                 prensor_test_util.create_simple_prensor())
             [new_expr] = calculate.calculate_prensors([tree],
                                                       options=options)
             size_result = sess.run(new_expr.node.size)
             self.assertAllEqual(size_result, 3)
示例#9
0
def calculate_list_map(expr: expression.Expression,
                       evaluator,
                       options: Optional[calculate_options.Options] = None):
    """Calculate a map from paths to nested lists, representing the leafs."""
    [my_prensor] = calculate.calculate_prensors([expr], options=options)
    if not options:
        options = calculate_options.get_default_options()
    ragged_tensor_map = my_prensor.get_ragged_tensors(options)
    string_tensor_map = {str(k): v for k, v in ragged_tensor_map.items()}
    string_np_map = evaluator.evaluate(string_tensor_map)
    return {k: v.to_list() for k, v in string_np_map.items()}
示例#10
0
 def test_slice_mask_begin_negative(self):
     root = create_expression.create_expression_from_prensor(
         prensor_test_util.create_big_prensor())
     root_2, new_path = slice_expression._get_slice_mask(
         root, path.Path(["doc"]), -1, None)
     result = calculate.calculate_prensors([root_2])[0]
     self.assertAllEqual(
         result.get_descendant_or_error(new_path).node.parent_index,
         [0, 1, 1])
     self.assertAllEqual(
         result.get_descendant_or_error(new_path).node.values,
         [True, False, True])
示例#11
0
    def test_filter_by_sibling(self):
        r"""Tests filter_by_sibling.

    Beginning with the struct:
         -----*----------------------------------------------------
        /                       \                                  \
     root0                    root1-----------------------      root2 (empty)
      /   \                   /    \               \      \
      |  keep_my_sib0:False  |  keep_my_sib1:True   | keep_my_sib2:False
    doc0-----               doc1---------------    doc2--------
     |       \                \           \    \               \
    bar:"a"  keep_me:False    bar:"b" bar:"c" keep_me:True      bar:"d"

    Filter doc with keep_my_sib:

    End with the struct (suppressing original doc):
         -----*----------------------------------------------------
        /                       \                                  \
    root0                    root1------------------        root2 (empty)
        \                   /    \                  \
        keep_my_sib0:False  |  keep_my_sib1:True   keep_my_sib2:False
                           new_doc0-----------
                             \           \    \
                             bar:"b" bar:"c" keep_me:True

    """
        root = create_expression.create_expression_from_prensor(
            _create_nested_prensor())
        root_2 = filter_expression.filter_by_sibling(root,
                                                     path.create_path("doc"),
                                                     "keep_my_sib", "new_doc")
        [result] = calculate.calculate_prensors([root_2])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc"
                                                      ])).node.parent_index,
            [1])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "keep_me"
                                                      ])).node.parent_index,
            [0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "keep_me"])).node.values,
            [True])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "bar"
                                                      ])).node.parent_index,
            [0, 0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "bar"])).node.values,
            [b"b", b"c"])
 def test_slice_mask_begin_positive(self):
     with self.session(use_gpu=False) as sess:
         root = create_expression.create_expression_from_prensor(
             prensor_test_util.create_big_prensor())
         root_2, new_path = slice_expression._get_slice_mask(
             root, path.Path(["doc"]), 1, None)
         result = prensor_value.materialize(
             calculate.calculate_prensors([root_2])[0], sess)
         self.assertAllEqual(
             result.get_descendant_or_error(new_path).node.parent_index,
             [0, 1, 1])
         self.assertAllEqual(
             result.get_descendant_or_error(new_path).node.values,
             [False, False, True])
示例#13
0
 def test_create_proto_index_directly_reroot_at_action_sparse_dense(self):
   sessions = [
       """
       event {
         action {}
         action {}
       }
       event {}
       event { action {} }
       """, "", """
       event {}
       event {
         action {}
         action {}
       }
       event {  }
       """
   ]
   with self.session(use_gpu=False) as sess:
     expr = proto_test_util.text_to_expression(sessions, test_pb2.Session)
     reroot_expr = expr.reroot("event.action")
     # Reroot with a depth > 1 (all the other cases are depth == 1)
     [prensor_tree] = calculate.calculate_prensors([
         reroot_expr.create_proto_index(
             "proto_index_directly_reroot_at_action")
     ])
     proto_index_node = prensor_tree.get_child_or_error(
         "proto_index_directly_reroot_at_action").node
     self.assertFalse(proto_index_node.is_repeated)
     sparse_tensors = prensor_util.get_sparse_tensors(
         prensor_tree, calculate_options.get_default_options())
     proto_index_directly_reroot_at_action = sparse_tensors[path.Path(
         ["proto_index_directly_reroot_at_action"])]
     [sparse_value, dense_value] = sess.run([
         proto_index_directly_reroot_at_action,
         tf.sparse_tensor_to_dense(proto_index_directly_reroot_at_action)
     ])
     self.assertAllEqual(sparse_value.values, [0, 0, 0, 2, 2])
     self.assertAllEqual(sparse_value.indices, [[0], [1], [2], [3], [4]])
     self.assertAllEqual(sparse_value.dense_shape, [5])
     self.assertAllEqual(dense_value, [0, 0, 0, 2, 2])
    def test_slice_begin(self):
        """Test slice with only begin specified.

    Starts with:
    {
      foo:9,
      foorepeated:[9],
      doc:[{
         bar:["a"],
         keep_me:False
        }],
      user:[
        {
          friends:["a"]
        }]
    }
    {foo:8,
     foorepeated:[8,7],
     doc:[{
       bar:["b","c"],
       keep_me:True
     },{
       bar:["d"]
     }],
     user:[{
       friends:["b", "c"]
     },{
       friends:["d"]
     }],
     }
     {foo:7,
      foorepeated:[6],
      user:[{friends:["e"]}]}

    Creates new_doc by slicing doc[1:]:
    {foo:9,
     foorepeated:[9],
     doc:[{
       bar:["a"],
       keep_me:False
     }],
     user:[{
       friends:["a"]
     }]}
    {foo:8,
     foorepeated:[8,7],
     doc:[{
       bar:["b","c"],
       keep_me:True
     },{
       bar:["d"]
     }],
     new_doc[{
       bar:["d"]
     }],
     user:[{
       friends:["b", "c"]
     },{
       friends:["d"]}],}
    {foo:7,
     foorepeated:[6],
     user:[{
       friends:["e"]
     }]}
    """
        with self.session(use_gpu=False) as sess:
            root = create_expression.create_expression_from_prensor(
                prensor_test_util.create_big_prensor())
            root_2 = slice_expression.slice_expression(root,
                                                       path.Path(["doc"]),
                                                       "new_doc", 1, None)
            result = prensor_value.materialize(
                calculate.calculate_prensors([root_2])[0], sess)
            self.assertAllEqual(
                result.get_descendant_or_error(path.Path(
                    ["new_doc"])).node.parent_index, [1])
            self.assertAllEqual(
                result.get_descendant_or_error(
                    path.Path(["new_doc", "keep_me"])).node.parent_index, [])
            self.assertAllEqual(
                result.get_descendant_or_error(
                    path.Path(["new_doc", "keep_me"])).node.values, [])
            self.assertAllEqual(
                result.get_descendant_or_error(path.Path(
                    ["new_doc", "bar"])).node.parent_index, [0])
            self.assertAllEqual(
                result.get_descendant_or_error(path.Path(["new_doc",
                                                          "bar"])).node.values,
                [b"d"])
    def test_map_prensor_to_prensor(self):
        original = create_expression.create_expression_from_prensor(
            prensor_test_util.create_nested_prensor())

        def my_prensor_op(original_prensor):
            # Note that we are copying over the original root prensor node. The root
            # node is ignored in the result.
            return prensor.create_prensor_from_descendant_nodes({
                path.Path([]):
                original_prensor.node,
                path.Path(["bar2"]):
                original_prensor.get_child_or_error("bar").node,
                path.Path(["keep_me2"]):
                original_prensor.get_child_or_error("keep_me").node
            })

        # Since the top node is actually a child node, we use the child schema.
        my_output_schema = map_prensor_to_prensor.create_schema(
            is_repeated=True,
            children={
                "bar2": {
                    "is_repeated": True,
                    "dtype": tf.string
                },
                "keep_me2": {
                    "is_repeated": False,
                    "dtype": tf.bool
                }
            })

        result = map_prensor_to_prensor.map_prensor_to_prensor(
            root_expr=original,
            source=path.Path(["doc"]),
            paths_needed=[path.Path(["bar"]),
                          path.Path(["keep_me"])],
            prensor_op=my_prensor_op,
            output_schema=my_output_schema)

        doc_result = result.get_child_or_error("doc")
        bar_result = doc_result.get_child_or_error("bar")
        keep_me_result = doc_result.get_child_or_error("keep_me")
        bar2_result = doc_result.get_child_or_error("bar2")
        keep_me2_result = doc_result.get_child_or_error("keep_me2")
        self.assertIsNone(doc_result.get_child("missing_field"))
        self.assertTrue(bar_result.is_repeated)
        self.assertTrue(bar2_result.is_repeated)
        self.assertEqual(bar_result.type, tf.string)
        self.assertEqual(bar2_result.type, tf.string)
        self.assertFalse(keep_me_result.is_repeated)
        self.assertFalse(keep_me2_result.is_repeated)
        self.assertEqual(keep_me_result.type, tf.bool)
        self.assertEqual(keep_me2_result.type, tf.bool)

        [prensor_result] = calculate.calculate_prensors([result])

        doc_value = prensor_result.get_child_or_error("doc")
        self.assertAllEqual([0, 1, 1], doc_value.node.parent_index)
        bar2_value = doc_value.get_child_or_error("bar2")
        self.assertAllEqual([0, 1, 1, 2], bar2_value.node.parent_index)
        self.assertAllEqual([b"a", b"b", b"c", b"d"], bar2_value.node.values)
        keep_me2_value = doc_value.get_child_or_error("keep_me2")
        self.assertAllEqual([0, 1], keep_me2_value.node.parent_index)
        self.assertAllEqual([False, True], keep_me2_value.node.values)
示例#16
0
def parse_elwc_with_struct2tensor(
        records: tf.Tensor,
        context_features: List[Feature],
        example_features: List[Feature],
        size_feature_name: Optional[str] = None) -> Dict[str, tf.RaggedTensor]:
    """Parses a batch of ELWC records into RaggedTensors using struct2tensor.

  Args:
    records: A dictionary with a single item. The value of this single item is
      the serialized ELWC input.
    context_features: List of context-level features.
    example_features: List of example-level features.
    size_feature_name: A string, the name of a feature for example list sizes.
      If None, which is default, this feature is not generated. Otherwise the
      feature is added to the feature dict.

  Returns:
    A dict that maps feature name to RaggedTensors.

  """
    def get_step_name(feature_name: str):
        """Gets the name of the step (a component in a prensor Path) for a feature.

    A prensor step cannot contain dots ("."), but a feature name can.

    Args:
      feature_name: name of the feature
    Returns:
      a valid step name.
    """
        return feature_name.replace('.', '_dot_')

    def get_default_filled_step_name(feature_name: str):
        return get_step_name(feature_name) + _DEFAULT_VALUE_SUFFIX

    def get_context_feature_path(feature: Feature):
        list_name = _TYPE_LIST_MAP.get(feature.dtype)
        return path.Path([
            'context', 'features', 'feature[{}]'.format(feature.name),
            list_name, 'value'
        ])

    def get_example_feature_path(feature: Feature):
        list_name = _TYPE_LIST_MAP.get(feature.dtype)
        return path.Path([
            'examples', 'features', 'feature[{}]'.format(feature.name),
            list_name, 'value'
        ])

    def get_promote_and_project_maps(features: List[Feature],
                                     is_context: bool):
        promote_map = {}
        project_map = {}
        if is_context:
            get_feature_path = get_context_feature_path
            get_promote_destination = lambda leaf_name: path.Path([leaf_name])
        else:
            get_feature_path = get_example_feature_path
            get_promote_destination = lambda leaf_name: path.Path(  # pylint: disable=g-long-lambda
                ['examples', leaf_name])
        for feature in features:
            promote_map[get_step_name(
                feature.name)] = get_feature_path(feature)
            leaf_name = (get_step_name(feature.name)
                         if feature.default_value is None else
                         get_default_filled_step_name(feature.name))
            project_map[feature.name] = get_promote_destination(leaf_name)
        return promote_map, project_map

    def get_pad_2d_ragged_fn(feature: Feature):
        def pad_2d_ragged(rt):
            dense = rt.to_tensor(shape=[None, feature.length],
                                 default_value=feature.default_value)
            flattened = tf.reshape(dense, [-1])
            return tf.RaggedTensor.from_uniform_row_length(flattened,
                                                           feature.length,
                                                           validate=False)

        return pad_2d_ragged

    context_promote_map, context_keys_to_promoted_paths = (
        get_promote_and_project_maps(context_features, is_context=True))

    examples_promote_map, examples_keys_to_promoted_paths = (
        get_promote_and_project_maps(example_features, is_context=False))

    # Build the struct2tensor query.
    s2t_expr = (proto_expr.create_expression_from_proto(
        records,
        input_pb2.ExampleListWithContext.DESCRIPTOR).promote_and_broadcast(
            context_promote_map,
            path.Path([])).promote_and_broadcast(examples_promote_map,
                                                 path.Path(['examples'])))
    # Pad features that have default_values specified.
    for features, parent_path in [(context_features, path.Path([])),
                                  (example_features, path.Path(['examples']))]:
        for feature in features:
            if feature.default_value is not None:
                s2t_expr = s2t_expr.map_ragged_tensors(
                    parent_path=parent_path,
                    source_fields=[get_step_name(feature.name)],
                    operator=get_pad_2d_ragged_fn(feature),
                    is_repeated=True,
                    dtype=feature.dtype,
                    new_field_name=get_default_filled_step_name(feature.name))
    to_project = list(
        itertools.chain(context_keys_to_promoted_paths.values(),
                        examples_keys_to_promoted_paths.values()))

    if size_feature_name is not None:
        s2t_expr = s2t_expr.create_size_field(path.Path(['examples']),
                                              get_step_name(size_feature_name))
        to_project.append(path.Path([get_step_name(size_feature_name)]))

    projection = s2t_expr.project(to_project)

    options = calculate_options.get_options_with_minimal_checks()
    prensor_result = calculate.calculate_prensors([projection], options)[0]
    # a map from path.Path to RaggedTensors.
    projected_with_paths = prensor_util.get_ragged_tensors(
        prensor_result, options)

    context_dict = {
        f: projected_with_paths[context_keys_to_promoted_paths[f]]
        for f in context_keys_to_promoted_paths
    }

    examples_dict = {
        f: projected_with_paths[examples_keys_to_promoted_paths[f]]
        for f in examples_keys_to_promoted_paths
    }

    result = {}

    result.update(context_dict)
    result.update(examples_dict)

    if size_feature_name is not None:
        result[size_feature_name] = projected_with_paths[path.Path(
            [get_step_name(size_feature_name)])]

    return result