Python get_weight_feature примеры использования

Язык программирования: Python

Пространство имен/Пакет: tensorflow_data_validation.arrow.arrow_util

Метод/Функция: get_weight_feature

Примеров на hotexamples.com: 5

Python get_weight_feature - 5 примеров найдено. Это лучшие примеры Python кода для tensorflow_data_validation.arrow.arrow_util.get_weight_feature, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: arrow_util_test.py Проект: tanguycdls/data-validation

 def testGetWeightFeatureNullArray(self):
     with self.assertRaisesRegex(ValueError, 'Weight column "w" cannot be '
                                 r'null\.'):
         arrow_util.get_weight_feature(pa.Table.from_arrays(
             [pa.array([[1], [2]]),
              pa.array([None, None])], ["v", "w"]),
                                       weight_column="w")

Пример #2

Показать файл

Файл: arrow_util_test.py Проект: tanguycdls/data-validation

 def testGetWeightFeatureNotFound(self):
     with self.assertRaisesRegex(
             ValueError,
             r'Weight column "w" not present in the input table\.'):
         arrow_util.get_weight_feature(pa.Table.from_arrays(
             [pa.array([[1], [2]]),
              pa.array([[1], [3]])], ["u", "v"]),
                                       weight_column="w")

Пример #3

Показать файл

 def testGetWeightFeatureTooManyValues(self):
   with self.assertRaisesRegex(
       ValueError,
       r'Weight column "w" must have exactly one value in each example\.'):
     arrow_util.get_weight_feature(
         pa.RecordBatch.from_arrays(
             [pa.array([[1], [2, 3]]),
              pa.array([[1], [2, 2]])], ["v", "w"]),
         weight_column="w")

Пример #4

Показать файл

Файл: arrow_util_test.py Проект: tanguycdls/data-validation

 def testGetWeightFeatureMissingValue(self):
     with self.assertRaisesRegex(
             ValueError,
             r'Weight column "w" must have exactly one value in each example\.'
     ):
         arrow_util.get_weight_feature(pa.Table.from_arrays(
             [pa.array([[1], [2]]),
              pa.array([[1], []])], ["v", "w"]),
                                       weight_column="w")

Пример #5

Показать файл

def _get_example_value_presence(
        record_batch: pa.RecordBatch, path: types.FeaturePath,
        boundaries: Optional[Sequence[float]],
        weight_column_name: Optional[Text]) -> Optional[pd.DataFrame]:
    """Returns information about which examples contained which values.

  This function treats all values for a given path within a single example
  as a set and and returns a mapping between each example index and the distinct
  values which are present in that example.

  The result of calling this function for path 'p' on an arrow record batch with
  the two records [{'p': ['a', 'a', 'b']}, {'p': [a]}] will be
  pd.Series(['a', 'b', 'a'], index=[0, 0, 1]).

  If the array retrieved from get_array is null, this function returns None.

  Args:
    record_batch: The RecordBatch in which to look up the path.
    path: The FeaturePath for which to fetch values.
    boundaries: Optionally, a set of bin boundaries to use for binning the array
      values.
    weight_column_name: Optionally, a weight column to return in addition to the
      value and example index.

  Returns:
    A Pandas DataFrame containing distinct pairs of array values and example
    indices, along with the corresponding flattened example weights. The index
    will be the example indices and the values will be stored in a column named
    'values'. If weight_column_name is provided, a second column will be
    returned containing the array values, and 'weights' containing the weights
    for the example from which each value came.
  """
    arr, example_indices = arrow_util.get_array(record_batch,
                                                path,
                                                return_example_indices=True)
    if stats_util.get_feature_type_from_arrow_type(path, arr.type) is None:
        return None

    arr_flat, parent_indices = arrow_util.flatten_nested(
        arr, return_parent_indices=True)
    is_binary_like = arrow_util.is_binary_like(arr_flat.type)
    assert boundaries is None or not is_binary_like, (
        'Boundaries can only be applied to numeric columns')
    if is_binary_like:
        # use dictionary_encode so we can use np.unique on object arrays
        dict_array = arr_flat.dictionary_encode()
        arr_flat = dict_array.indices
        arr_flat_dict = np.asarray(dict_array.dictionary)
    example_indices_flat = example_indices[parent_indices]
    if boundaries is not None:
        element_indices, bins = bin_util.bin_array(arr_flat, boundaries)
        rows = np.vstack([example_indices_flat[element_indices], bins])
    else:
        rows = np.vstack([example_indices_flat, np.asarray(arr_flat)])
    if not rows.size:
        return None
    # Deduplicate values which show up more than once in the same example. This
    # makes P(X=x|Y=y) in the standard lift definition behave as
    # P(x \in Xs | y \in Ys) if examples contain more than one value of X and Y.
    unique_rows = np.unique(rows, axis=1)
    example_indices = unique_rows[0, :]
    values = unique_rows[1, :]
    if is_binary_like:
        # return binary like values a pd.Categorical wrapped in a Series. This makes
        # subsqeuent operations like pd.Merge cheaper.
        values = pd.Categorical.from_codes(values, categories=arr_flat_dict)
    columns = {'example_indices': example_indices, 'values': values}
    if weight_column_name:
        weights = arrow_util.get_weight_feature(record_batch,
                                                weight_column_name)
        columns['weights'] = np.asarray(weights)[example_indices]
    df = pd.DataFrame(columns)
    return df.set_index('example_indices')