Exemplos de get_boundaries em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: tensorflow_data_validation.utils.bin_util

Método / Função: get_boundaries

Exemplos em hotexamples.com: 2

get_boundaries em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de tensorflow_data_validation.utils.bin_util.get_boundaries em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def _make_dataset_feature_stats_proto( lifts: Tuple[_SlicedFeatureKey, _LiftSeries], y_path: types.FeaturePath, y_boundaries: Optional[np.ndarray] ) -> Tuple[types.SliceKey, statistics_pb2.DatasetFeatureStatistics]: """Generates DatasetFeatureStatistics proto for a given x_path, y_path pair. Args: lifts: The result of two successive group bys of lift values. The innermost grouping collects all the lift values for a given (slice, x_path and y_value) tuple (corresponding to a single LiftSeries message). The outermost grouping collects all the lift values for the same (slice, x_path) tuple (corresponding to the set of the LiftSeries which share the same value of y_path). The full structure of lifts is described by: (slice, x_path), [(y, y_count, [(x, lift, xy_count, x_count)])] y_path: The path used as Y in the lift expression: lift = P(Y=y|X=x) / P(Y=y). y_boundaries: Optionally, a set of bin boundaries used for binning y_path values. Returns: The populated DatasetFeatureStatistics proto. """ key, lift_series_list = lifts stats = statistics_pb2.DatasetFeatureStatistics() cross_stats = stats.cross_features.add(path_x=key.x_path.to_proto(), path_y=y_path.to_proto()) for lift_series in sorted(lift_series_list): lift_series_proto = ( cross_stats.categorical_cross_stats.lift.lift_series.add( y_count=lift_series.y_count)) y = lift_series.y if y_boundaries is not None: low_value, high_value = bin_util.get_boundaries(y, y_boundaries) lift_series_proto.y_bucket.low_value = low_value lift_series_proto.y_bucket.high_value = high_value elif isinstance(y, six.string_types): lift_series_proto.y_string = y else: lift_series_proto.y_int = y # dedupe possibly overlapping top_k and bottom_k x values. lift_values_deduped = {v.x: v for v in lift_series.lift_values} # sort by lift DESC, x ASC lift_values_sorted = sorted(lift_values_deduped.values(), key=lambda v: (-v.lift, v.x)) for lift_value in lift_values_sorted: lift_value_proto = lift_series_proto.lift_values.add( lift=lift_value.lift, x_count=lift_value.x_count, x_and_y_count=lift_value.xy_count) x = lift_value.x if isinstance(x, six.string_types): lift_value_proto.x_string = x else: lift_value_proto.x_int = x return key.slice_key, stats

Exemplo n.º 2

0

Exibir arquivo

def _make_dataset_feature_stats_proto( lifts: Tuple[_SlicedFeatureKey, Iterable[_LiftSeries]], y_path: types.FeaturePath, y_boundaries: Optional[np.ndarray], weighted_examples: bool, output_custom_stats: bool ) -> Tuple[types.SliceKey, statistics_pb2.DatasetFeatureStatistics]: """Generates DatasetFeatureStatistics proto for a given x_path, y_path pair. Args: lifts: The result of two successive group bys of lift values. The innermost grouping collects all the lift values for a given (slice, x_path and y_value) tuple (corresponding to a single LiftSeries message). The outermost grouping collects all the lift values for the same (slice, x_path) tuple (corresponding to the set of the LiftSeries which share the same value of y_path). The full structure of lifts is described by: (slice, x_path), [(y, y_count, [(x, lift, xy_count, x_count)])] y_path: The path used as Y in the lift expression: lift = P(Y=y|X=x) / P(Y=y). y_boundaries: Optionally, a set of bin boundaries used for binning y_path values. weighted_examples: Whether lift is computed over weighted examples, in which case the proto will output weighted counts (as floats) rather than simple counts (as ints). output_custom_stats: Whether to output custom stats for use with Facets. Returns: The populated DatasetFeatureStatistics proto. """ key, lift_series_list = lifts stats = statistics_pb2.DatasetFeatureStatistics() cross_stats = stats.cross_features.add(path_x=key.x_path.to_proto(), path_y=y_path.to_proto()) if output_custom_stats: feature_stats = stats.features.add(path=key.x_path.to_proto()) for lift_series in sorted(lift_series_list): lift_series_proto = ( cross_stats.categorical_cross_stats.lift.lift_series.add()) if weighted_examples: lift_series_proto.weighted_y_count = lift_series.y_count else: lift_series_proto.y_count = lift_series.y_count y = lift_series.y if y_boundaries is not None and isinstance(y, int): low_value, high_value = bin_util.get_boundaries(y, y_boundaries) lift_series_proto.y_bucket.low_value = low_value lift_series_proto.y_bucket.high_value = high_value y_display_fmt = '[{},{}]' if high_value == float( 'inf') else '[{},{})' y_display_val = y_display_fmt.format(low_value, high_value) elif isinstance(y, six.text_type): lift_series_proto.y_string = y y_display_val = y elif isinstance(y, six.binary_type): y_string = _get_unicode_value(y, y_path) lift_series_proto.y_string = y_string y_display_val = y_string else: lift_series_proto.y_int = y y_display_val = str(y) if output_custom_stats: hist = feature_stats.custom_stats.add( name='Lift (Y={})'.format(y_display_val)).rank_histogram # dedupe possibly overlapping top_k and bottom_k x values. lift_values_deduped = {v.x: v for v in lift_series.lift_values} # sort by lift DESC, x ASC lift_values_sorted = sorted(lift_values_deduped.values(), key=lambda v: (-v.lift, v.x)) for lift_value in lift_values_sorted: lift_value_proto = lift_series_proto.lift_values.add( lift=lift_value.lift) if weighted_examples: lift_value_proto.weighted_x_count = lift_value.x_count lift_value_proto.weighted_x_and_y_count = lift_value.xy_count else: lift_value_proto.x_count = lift_value.x_count lift_value_proto.x_and_y_count = lift_value.xy_count x = lift_value.x if isinstance(x, six.text_type): lift_value_proto.x_string = x x_display_val = x elif isinstance(x, six.binary_type): x_string = _get_unicode_value(x, key.x_path) lift_value_proto.x_string = x_string x_display_val = x_string else: lift_value_proto.x_int = x x_display_val = str(x) if output_custom_stats: hist.buckets.add(label=x_display_val, sample_count=lift_value.lift) return key.slice_key, stats