Exemplo n.º 1
0
  def _convert_internal(self, tensor: TensorAlike) -> List[pa.Array]:
    # Transpose the indices array (and materialize the result in C-order)
    # because later we will use individual columns of the original indices.
    indices_np = (
        np.ascontiguousarray(
            np.transpose(np.asarray(tensor.indices)), dtype=np.int64))

    # the first column of indices identifies which row each sparse value belongs
    # to.
    parent_indices = pa.array(indices_np[0, :], type=pa.int64())
    num_rows = int(np.asarray(tensor.dense_shape)[0])

    result = [
        array_util.MakeListArrayFromParentIndicesAndValues(
            num_rows,
            parent_indices,
            pa.array(np.asarray(tensor.values), type=self._values_arrow_type),
            empty_list_as_null=False)
    ]

    for i in range(len(self._index_column_names)):
      result.append(
          array_util.MakeListArrayFromParentIndicesAndValues(
              num_rows,
              parent_indices,
              pa.array(indices_np[i + 1, :], type=pa.int64()),
              empty_list_as_null=False))

    return result
Exemplo n.º 2
0
 def _convert_internal(self, tensor: TensorAlike) -> List[pa.Array]:
   # Algorithm:
   # Assume:
   #   - the COO indices are sorted (partially checked below)
   #   - the SparseTensor is 2-D (checked in can_handle())
   #   - the SparseTensor is ragged
   # Then the first dim of those COO indices contains "parent indices":
   # parent_index[i] == j means i-th value belong to j-th sub list.
   # Then we have a C++ util to convert parent indices + values to a ListArray.
   #
   # Note that the resulting ListArray doesn't explicitly store the second
   # dense dimension. When it is converted back to SparseTensor with
   # tensor_adapter the second dense dimension is recovered as an upper bound
   # for second indices + 1. Therefore, if SparseTensor's second dense
   # dimension is not tight, then the composition
   # TensorAdapter(TensorsToRecordBatchConverter()) is not an identity.
   dense_shape = np.asarray(tensor.dense_shape)
   indices = np.asarray(tensor.indices)
   parent_indices = indices[:, 0]
   assert np.min(np.diff(parent_indices), initial=0) >= 0, (
       "The sparse indices must be sorted")
   return [
       array_util.MakeListArrayFromParentIndicesAndValues(
           dense_shape[0],
           pa.array(parent_indices, type=pa.int64()),
           pa.array(np.asarray(tensor.values), type=self._values_arrow_type),
           empty_list_as_null=False)
   ]
Exemplo n.º 3
0
 def testMakeListArray(self, num_parents, parent_indices, values,
                       empty_list_as_null, expected):
     actual = array_util.MakeListArrayFromParentIndicesAndValues(
         num_parents, parent_indices, values, empty_list_as_null)
     actual.validate()
     if not empty_list_as_null:
         self.assertEqual(actual.null_count, 0)
     self.assertTrue(actual.equals(expected),
                     "actual: {}, expected: {}".format(actual, expected))
Exemplo n.º 4
0
 def _convert_internal(self, tensor: TensorAlike) -> List[pa.Array]:
   # Algorithm:
   # Assume:
   #   - the COO indices are sorted (partially checked below)
   #   - the SparseTensor is 2-D (checked in can_handle())
   #   - the SparseTensor is ragged (partially checked below)
   # Then the first dim of those COO indices contains "parent indices":
   # parent_index[i] == j means i-th value belong to j-th sub list.
   # Then we have a C++ util to convert parent indices + values to a ListArray.
   dense_shape = np.asarray(tensor.dense_shape)
   indices = np.asarray(tensor.indices)
   assert indices.size == 0 or dense_shape[1] == np.max(indices, 0)[1] + 1, (
       "SparseTensor is not 2-D ragged")
   parent_indices = indices[:, 0]
   assert np.min(np.diff(parent_indices), initial=0) >= 0, (
       "The sparse indices must be sorted")
   return [
       array_util.MakeListArrayFromParentIndicesAndValues(
           dense_shape[0],
           pa.array(parent_indices, type=pa.int64()),
           pa.array(np.asarray(tensor.values), type=self._values_arrow_type),
           empty_list_as_null=False)
   ]
Exemplo n.º 5
0
 def testMakeListArray(self, num_parents, parent_indices, values, expected):
   actual = array_util.MakeListArrayFromParentIndicesAndValues(
       num_parents, parent_indices, values)
   self.assertTrue(
       actual.equals(expected),
       "actual: {}, expected: {}".format(actual, expected))
Exemplo n.º 6
0
 def testInvalidInput(self, num_parents, parent_indices, values,
                      expected_error, expected_error_regexp):
   with self.assertRaisesRegex(expected_error, expected_error_regexp):
     array_util.MakeListArrayFromParentIndicesAndValues(
         num_parents, parent_indices, values)