def structured_tensor_to_prensor( st: structured_tensor.StructuredTensor, default_field_name: path.Step = "data") -> prensor.Prensor: """Converts a structured tensor to a prensor. Certain rank information must be known. For more details about the transformation, see the notes above. Args: st: the structured tensor to convert. default_field_name: the name to use when there is an unnamed dimension. Returns: a logically equivalent Prensor. Raises: ValueError: if there is an issue with the structured tensor. """ row_partitions = st.row_partitions if len(row_partitions) >= 1: child_prensor = _structured_tensor_to_child_prensor(st, default_field_name) return prensor.create_prensor_from_root_and_children( prensor.RootNodeTensor((st).nrows()), {default_field_name: child_prensor}) elif st.rank == 1: return prensor.create_prensor_from_root_and_children( prensor.RootNodeTensor((st).nrows()), _structured_tensor_prensor_map(st, default_field_name)) else: # st is a scalar StructuredTensor. return structured_tensor_to_prensor(_expand_dims(st, 0), default_field_name)
def testMultipleColumnsTwoRowGroupsAndEqualBatchSize_OutputsPrensor(self): """Tests that the correct prensor for three columns is outputted.""" pq_ds = parquet.ParquetDataset(filenames=self._rowgroup_test_filenames, value_paths=[ "DocId", "Name.Language.Code", "Name.Language.Country" ], batch_size=2) expected_prensor = prensor.create_prensor_from_descendant_nodes({ path.Path([]): prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)), path.Path(["DocId"]): prensor.LeafNodeTensor(tf.constant([0, 1], dtype=tf.int64), tf.constant([10, 20], dtype=tf.int64), True), path.Path(["Name"]): prensor.ChildNodeTensor(tf.constant([0, 0, 0, 1], dtype=tf.int64), True), path.Path(["Name", "Language"]): prensor.ChildNodeTensor(tf.constant([0, 0, 2], dtype=tf.int64), True), path.Path(["Name", "Language", "Code"]): prensor.LeafNodeTensor(tf.constant([0, 1, 2], dtype=tf.int64), tf.constant([b"en-us", b"en", b"en-gb"]), True), path.Path(["Name", "Language", "Country"]): prensor.LeafNodeTensor(tf.constant([0, 2], dtype=tf.int64), tf.constant([b"us", b"gb"]), True) }) for i, pren in enumerate(pq_ds): if i == 0: self._assertPrensorEqual(pren, expected_prensor)
def _as_root_node_tensor(node_tensor): if isinstance(node_tensor, prensor.RootNodeTensor): return node_tensor if isinstance(node_tensor, prensor.ChildNodeTensor): return prensor.RootNodeTensor(node_tensor.size) raise ValueError("Must be child or root node tensor (found {})".format( type(node_tensor)))
def testPromoteAndProjectExpression(self): filenames = [ "struct2tensor/testdata/parquet_testdata/dremel_example.parquet" ] batch_size = 2 exp = parquet.create_expression_from_parquet_file(filenames) new_exp = promote.promote(exp, path.Path(["Name", "Language", "Code"]), "new_code") new_code_project_exp = project.project( new_exp, [path.Path(["Name", "new_code"])]) docid_project_exp = project.project(exp, [path.Path(["DocId"])]) pqds = parquet.calculate_parquet_values( [new_code_project_exp, docid_project_exp], exp, filenames, batch_size) new_code_expected = prensor.create_prensor_from_descendant_nodes({ path.Path([]): prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)), path.Path(["Name"]): prensor.ChildNodeTensor(tf.constant([0, 0, 0, 1], dtype=tf.int64), True), path.Path(["Name", "new_code"]): prensor.LeafNodeTensor(tf.constant([0, 0, 2], dtype=tf.int64), tf.constant([b"en-us", b"en", b"en-gb"]), True) }) docid_expected = prensor.create_prensor_from_descendant_nodes({ path.Path([]): prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)), path.Path(["DocId"]): prensor.LeafNodeTensor(tf.constant([0, 1], dtype=tf.int64), tf.constant([10, 20], dtype=tf.int64), False) }) for ele in pqds: new_code_pren = ele[0] docid_pren = ele[1] self._assertPrensorEqual(new_code_pren, new_code_expected) self._assertPrensorEqual(docid_pren, docid_expected)
def testPlaceholderExpression(self): pren = prensor_test_util.create_nested_prensor() expected_pren = prensor.create_prensor_from_descendant_nodes({ path.Path([]): prensor.RootNodeTensor(tf.constant(3, dtype=tf.int64)), path.Path(["new_friends"]): prensor.LeafNodeTensor( tf.constant([0, 1, 1, 1, 2], dtype=tf.int64), tf.constant(["a", "b", "c", "d", "e"], dtype=tf.string), True) }) root_schema = mpp.create_schema(is_repeated=True, children={ "doc": { "is_repeated": True, "children": { "bar": { "is_repeated": True, "dtype": tf.string }, "keep_me": { "is_repeated": False, "dtype": tf.bool } } }, "user": { "is_repeated": True, "children": { "friends": { "is_repeated": True, "dtype": tf.string } } } }) exp = placeholder.create_expression_from_schema(root_schema) promote_exp = promote.promote(exp, path.Path(["user", "friends"]), "new_friends") project_exp = project.project(promote_exp, [path.Path(["new_friends"])]) new_friends_exp = project_exp.get_descendant(path.Path(["new_friends" ])) result = calculate.calculate_values([new_friends_exp], feed_dict={exp: pren}) res_node = result[0] exp_node = expected_pren.get_descendant(path.Path(["new_friends" ])).node self.assertAllEqual(res_node.is_repeated, exp_node.is_repeated) self.assertAllEqual(res_node.values, exp_node.values) self.assertAllEqual(res_node.parent_index, exp_node.parent_index)
def testCreateExpressionFromSchema(self): root_schema = mpp.create_schema(is_repeated=True, children={}) exp = placeholder.create_expression_from_schema(root_schema) pren = prensor.create_prensor_from_descendant_nodes({ path.Path([]): prensor.RootNodeTensor(tf.constant(1, dtype=tf.int64)) }) result = calculate.calculate_values([exp], feed_dict={exp: pren}) res_node = result[0] exp_node = pren.get_descendant(path.Path([])).node self.assertAllEqual(res_node.is_repeated, exp_node.is_repeated) self.assertAllEqual(res_node.size, exp_node.size)
def create_root_node(size: int) -> prensor.RootNodeTensor: return prensor.RootNodeTensor(tf.constant(size, dtype=tf.int64))