def test_create_counter_distribution(self): sampler = statesampler.StateSampler('', counters.CounterFactory()) statesampler.set_current_tracker(sampler) state1 = sampler.scoped_state('mystep', 'myState', metrics_container=MetricsContainer('mystep')) sampler.start() with state1: counter_ns = 'aCounterNamespace' distro_ns = 'aDistributionNamespace' name = 'a_name' counter = Metrics.counter(counter_ns, name) distro = Metrics.distribution(distro_ns, name) counter.inc(10) counter.dec(3) distro.update(10) distro.update(2) self.assertTrue(isinstance(counter, Metrics.DelegatingCounter)) self.assertTrue(isinstance(distro, Metrics.DelegatingDistribution)) del distro del counter container = MetricsEnvironment.current_container() self.assertEqual( container.counters[MetricName(counter_ns, name)].get_cumulative(), 7) self.assertEqual( container.distributions[MetricName(distro_ns, name)].get_cumulative(), DistributionData(12, 2, 2, 10)) sampler.stop()
def test_create_counter_distribution(self): MetricsEnvironment.set_current_container(MetricsContainer('mystep')) counter_ns = 'aCounterNamespace' distro_ns = 'aDistributionNamespace' gauge_ns = 'aGaugeNamespace' name = 'a_name' counter = Metrics.counter(counter_ns, name) distro = Metrics.distribution(distro_ns, name) gauge = Metrics.gauge(gauge_ns, name) counter.inc(10) counter.dec(3) distro.update(10) distro.update(2) gauge.set(10) self.assertTrue(isinstance(counter, Metrics.DelegatingCounter)) self.assertTrue(isinstance(distro, Metrics.DelegatingDistribution)) self.assertTrue(isinstance(gauge, Metrics.DelegatingGauge)) del distro del counter del gauge container = MetricsEnvironment.current_container() self.assertEqual( container.counters[MetricName(counter_ns, name)].get_cumulative(), 7) self.assertEqual( container.distributions[MetricName(distro_ns, name)].get_cumulative(), DistributionData(12, 2, 2, 10)) self.assertEqual( container.gauges[MetricName(gauge_ns, name)].get_cumulative().value, 10)
def process(self, element): gauge = Metrics.gauge(self.__class__, 'latest_element') gauge.set(element) count = Metrics.counter(self.__class__, 'elements') count.inc() distro = Metrics.distribution(self.__class__, 'element_dist') distro.update(element) return [element]
def __init__(self, project, fixed_batch_size=None): """ Args: project: str, the cloud project id. fixed_batch_size: int, for testing only, this forces all batches of writes to be a fixed size, for easier unittesting. """ self._project = project self._datastore = None self._fixed_batch_size = fixed_batch_size self._rpc_successes = Metrics.counter( _Mutate.DatastoreWriteFn, "datastoreRpcSuccesses") self._rpc_errors = Metrics.counter( _Mutate.DatastoreWriteFn, "datastoreRpcErrors")
def __init__(self, project): """ Args: project: (str) cloud project id """ self._project = project self._client = None self._rpc_successes = Metrics.counter(_Mutate.DatastoreMutateFn, "datastoreRpcSuccesses") self._rpc_errors = Metrics.counter(_Mutate.DatastoreMutateFn, "datastoreRpcErrors") self._throttled_secs = Metrics.counter( _Mutate.DatastoreMutateFn, "cumulativeThrottlingSeconds") self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
def __init__(self, gs_path, filename, runner): self._filename = download_gs(gs_path, filename) _meta = loadmat(self._filename) #del_destination(GS_UPPATH) db = 'imdb' logger.info('loadmat end') full_path = _meta[db][0, 0]["full_path"][0] dob = _meta[db][0, 0]["dob"][0] # Matlab serial date number gender = _meta[db][0, 0]["gender"][0] photo_taken = _meta[db][0, 0]["photo_taken"][0] # year face_score = _meta[db][0, 0]["face_score"][0] #self.second_face_score = _meta[db][0, 0]["second_face_score"][0] length = len(dob) logger.info('data length=%s' % length) age = [calc_age(photo_taken[j], dob[j]) for j in range(length)] logger.info('self etc end') self.value = [(full_path[i][0].strip(), gender[i], age[i]) for i in range(length) if check_face(face_score[i], age[i])] logger.info('runner=%s' % type(runner)) if runner == 'DirectRunner': self.value = self.value[:10] self.records_read = Metrics.counter(self.__class__, 'recordsRead') self._count = len(self.value) logger.info('final length=%s' % self._count) logger.info('init end')
def Count(name, counter_value_fn): counter = Metrics.counter('Count', name) def wrapper(x): counter.inc(counter_value_fn(x) if counter_value_fn else 1) return x return name >> beam.Map(wrapper)
def __init__(self, project, fixed_batch_size=None): """ Args: project: str, the cloud project id. fixed_batch_size: int, for testing only, this forces all batches of writes to be a fixed size, for easier unittesting. """ self._project = project self._datastore = None self._fixed_batch_size = fixed_batch_size self._rpc_successes = Metrics.counter( _Mutate.DatastoreWriteFn, "datastoreRpcSuccesses") self._rpc_errors = Metrics.counter( _Mutate.DatastoreWriteFn, "datastoreRpcErrors") self._throttled_secs = Metrics.counter( _Mutate.DatastoreWriteFn, "cumulativeThrottlingSeconds") self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
def test_run_api(self): my_metric = Metrics.counter('namespace', 'my_metric') runner = DirectRunner() result = runner.run( beam.Create([1, 10, 100]) | beam.Map(lambda x: my_metric.inc(x))) result.wait_until_finish() # Use counters to assert the pipeline actually ran. my_metric_value = result.metrics().query()['counters'][0].committed self.assertEqual(my_metric_value, 111)
def execute_or_skip_step(step): supported_types = step.get_supported_types() processed_counter = Metrics.counter('PipelineStep', get_step_processed_counter(step)) ignored_counter = Metrics.counter('PipelineStep', get_step_ignored_counter(step)) def wrapper(x): data_type = x['type'] if data_type in supported_types: get_logger().debug('excuting step %s: %s (%s)', step, x.keys(), data_type) result = extend_dict(x, step(x)) get_logger().debug('result of step %s: %s (%s)', step, result.keys(), result.get('type')) processed_counter.inc() return result else: get_logger().debug( 'skipping step %s, %s not in supported types (%s)', step, data_type, supported_types ) ignored_counter.inc() return x return wrapper
def __init__(self, num_workers, *unused_args, **unused_kwargs): """Initializes a ramp-up throttler transform. Args: num_workers: A hint for the expected number of workers, used to derive the local rate limit. """ super().__init__(*unused_args, **unused_kwargs) self._num_workers = num_workers self._successful_ops = util.MovingSum(window_ms=1000, bucket_ms=1000) self._first_instant = datetime.datetime.now() self._throttled_secs = Metrics.counter(RampupThrottlingFn, "cumulativeThrottlingSeconds")
def test_scoped_container(self): c1 = MetricsContainer('mystep') c2 = MetricsContainer('myinternalstep') with ScopedMetricsContainer(c1): self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(2) with ScopedMetricsContainer(c2): self.assertEqual(c2, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(3) self.assertEqual( c2.get_cumulative().counters.items(), [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)]) self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(4) self.assertEqual( c1.get_cumulative().counters.items(), [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
def test_scoped_container(self): c1 = MetricsContainer('mystep') c2 = MetricsContainer('myinternalstep') with ScopedMetricsContainer(c1): self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(2) with ScopedMetricsContainer(c2): self.assertEqual(c2, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(3) self.assertEqual(list(c2.get_cumulative().counters.items()), [(MetricKey('myinternalstep', MetricName('ns', 'name')), 3)]) self.assertEqual(c1, MetricsEnvironment.current_container()) counter = Metrics.counter('ns', 'name') counter.inc(4) self.assertEqual( list(c1.get_cumulative().counters.items()), [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
def test_log_metrics(self, mock_logger): logger = MetricLogger() logger.minimum_logging_frequency_msec = -1 namespace = Metrics.get_namespace(self.__class__) metric_name = MetricName(namespace, 'metric_logger_test') logger.update(HistogramCellFactory(LinearBucket(0, 1, 10)), metric_name, 1) logger.log_metrics() class Contains(str): def __eq__(self, other): return self in other mock_logger.info.assert_called_once_with( Contains('HistogramData(Total count: 1, P99: 2, P90: 2, P50: 2)'))
def MapOrLog(fn, log_fn=None, error_count=None): if log_fn is None: log_fn = _default_exception_log_fn error_counter = (Metrics.counter('MapOrLog', error_count) if error_count else None) def wrapper(x): try: yield fn(x) except Exception as e: # pylint: disable=broad-except if error_counter: error_counter.inc() log_fn(e, x) return beam.FlatMap(wrapper)
def test_create_counter_distribution(self): MetricsEnvironment.set_current_container(MetricsContainer('mystep')) counter_ns = 'aCounterNamespace' distro_ns = 'aDistributionNamespace' name = 'a_name' counter = Metrics.counter(counter_ns, name) distro = Metrics.distribution(distro_ns, name) counter.inc(10) counter.dec(3) distro.update(10) distro.update(2) self.assertTrue(isinstance(counter, Metrics.DelegatingCounter)) self.assertTrue(isinstance(distro, Metrics.DelegatingDistribution)) del distro del counter container = MetricsEnvironment.current_container() self.assertEqual( container.counters[MetricName(counter_ns, name)].get_cumulative(), 7) self.assertEqual( container.distributions[MetricName(distro_ns, name)].get_cumulative(), DistributionData(12, 2, 2, 10))
def test_create_counter_distribution(self): sampler = statesampler.StateSampler('', counters.CounterFactory()) statesampler.set_current_tracker(sampler) state1 = sampler.scoped_state( 'mystep', 'myState', metrics_container=MetricsContainer('mystep')) try: sampler.start() with state1: counter_ns = 'aCounterNamespace' distro_ns = 'aDistributionNamespace' name = 'a_name' counter = Metrics.counter(counter_ns, name) distro = Metrics.distribution(distro_ns, name) counter.inc(10) counter.dec(3) distro.update(10) distro.update(2) self.assertTrue(isinstance(counter, Metrics.DelegatingCounter)) self.assertTrue( isinstance(distro, Metrics.DelegatingDistribution)) del distro del counter container = MetricsEnvironment.current_container() self.assertEqual( container.get_counter(MetricName(counter_ns, name)).get_cumulative(), 7) self.assertEqual( container.get_distribution(MetricName( distro_ns, name)).get_cumulative(), DistributionData(12, 2, 2, 10)) finally: sampler.stop()
def test_uses_right_container(self): c1 = MetricsContainer('step1') c2 = MetricsContainer('step2') counter = Metrics.counter('ns', 'name') MetricsEnvironment.set_current_container(c1) counter.inc() MetricsEnvironment.set_current_container(c2) counter.inc(3) MetricsEnvironment.unset_current_container() self.assertEqual(list(c1.get_cumulative().counters.items()), [(MetricKey('step1', MetricName('ns', 'name')), 1)]) self.assertEqual(list(c2.get_cumulative().counters.items()), [(MetricKey('step2', MetricName('ns', 'name')), 3)])
def histogram(namespace, name, bucket_type, logger=None): # type: (Union[Type, str], str, BucketType, Optional[MetricLogger]) -> Metrics.DelegatingHistogram """Obtains or creates a Histogram metric. Args: namespace: A class or string that gives the namespace to a metric name: A string that gives a unique name to a metric bucket_type: A type of bucket used in a histogram. A subclass of apache_beam.utils.histogram.BucketType logger: MetricLogger for logging locally aggregated metric Returns: A Histogram object. """ namespace = UserMetrics.get_namespace(namespace) return Metrics.DelegatingHistogram(MetricName(namespace, name), bucket_type, logger)
def test_uses_right_container(self): c1 = MetricsContainer('step1') c2 = MetricsContainer('step2') counter = Metrics.counter('ns', 'name') MetricsEnvironment.set_current_container(c1) counter.inc() MetricsEnvironment.set_current_container(c2) counter.inc(3) MetricsEnvironment.unset_current_container() self.assertEqual( c1.get_cumulative().counters.items(), [(MetricKey('step1', MetricName('ns', 'name')), 1)]) self.assertEqual( c2.get_cumulative().counters.items(), [(MetricKey('step2', MetricName('ns', 'name')), 3)])
def counter(urn, labels=None, process_wide=False): # type: (str, Optional[Dict[str, str]], bool) -> UserMetrics.DelegatingCounter """Obtains or creates a Counter metric. Args: namespace: A class or string that gives the namespace to a metric name: A string that gives a unique name to a metric urn: URN to populate on a MonitoringInfo, when sending to RunnerHarness. labels: Labels to populate on a MonitoringInfo process_wide: Whether or not the metric is specific to the current bundle or should be calculated for the entire process. Returns: A Counter object. """ return UserMetrics.DelegatingCounter( MetricName(namespace=None, name=None, urn=urn, labels=labels), process_wide=process_wide)
class GcsIOOverrides(object): """Functions for overriding Google Cloud Storage I/O client.""" _THROTTLED_SECS = Metrics.counter('StorageV1', "cumulativeThrottlingSeconds") @classmethod def retry_func(cls, retry_args): # handling GCS download throttling errors (BEAM-7424) if (isinstance(retry_args.exc, exceptions.BadStatusCodeError) and retry_args.exc.status_code == http_wrapper.TOO_MANY_REQUESTS): _LOGGER.debug( 'Caught GCS quota error (%s), retrying.', retry_args.exc.status_code) else: return http_wrapper.HandleExceptionsAndRebuildHttpConnections(retry_args) http_wrapper.RebuildHttpConnections(retry_args.http) _LOGGER.debug( 'Retrying request to url %s after exception %s', retry_args.http_request.url, retry_args.exc) sleep_seconds = util.CalculateWaitForRetry( retry_args.num_retries, max_wait=retry_args.max_retry_wait) cls._THROTTLED_SECS.inc(math.ceil(sleep_seconds)) time.sleep(sleep_seconds)
def distribution(self, name: str) -> 'Distribution': from apache_beam.metrics.metric import Metrics return Distribution(Metrics.distribution(self._get_namespace(), name))
def meter(self, name: str, time_span_in_seconds: int = 60) -> 'Meter': from apache_beam.metrics.metric import Metrics # There is no meter type in Beam, use counter to implement meter return Meter( Metrics.counter(self._get_namespace(time_span_in_seconds), name))
def gauge(self, name: str, obj: Callable[[], int]) -> None: from apache_beam.metrics.metric import Metrics self._flink_gauge[name] = obj self._beam_gauge[name] = Metrics.gauge(self._get_namespace(), name)
def counter(self, name: str) -> 'Counter': from apache_beam.metrics.metric import Metrics return Counter(Metrics.counter(self._get_namespace(), name))
def __init__(self): # TODO(BEAM-6158): Revert the workaround once we can pickle super() on py3. # super(ParseGameEventFn, self).__init__() beam.DoFn.__init__(self) self.num_parse_errors = Metrics.counter(self.__class__, 'num_parse_errors')
def test_get_namespace_string(self): namespace = 'MyNamespace' self.assertEqual(namespace, Metrics.get_namespace(namespace))
def __init__(self): super(ParseGameEventFn, self).__init__() self.num_parse_errors = Metrics.counter(self.__class__, 'num_parse_errors')
def finish_bundle(self): count = Metrics.counter(self.__class__, 'finished_bundles') count.inc()
def test_distribution_empty_namespace(self): with self.assertRaises(ValueError): Metrics.distribution("", "names")
def __init__(self): beam.DoFn.__init__(self) self.num_parse_errors = Metrics.counter( self.__class__, "num_parse_errors" )
def __init__(self): self.processed_revision_pairs = Metrics.counter(self.__class__, 'processed_revision_pairs') self.errors = Metrics.counter(self.__class__, 'errors') self.revision_skipped = Metrics.counter(self.__class__, 'revision_skipped') self.sentence_revises = Metrics.counter(self.__class__, 'sentence_revises')
def test_get_namespace_class(self): class MyClass(object): pass self.assertEqual('{}.{}'.format(MyClass.__module__, MyClass.__name__), Metrics.get_namespace(MyClass))
def start_bundle(self): count = Metrics.counter(self.__class__, 'bundles') count.inc()
def process(self, element): count = Metrics.counter(self.__class__, 'elements') count.inc() distro = Metrics.distribution(self.__class__, 'element_dist') distro.update(element) return [element]
def test_counter_empty_namespace(self): with self.assertRaises(ValueError): Metrics.counter("", "names")
def __init__(self): self.processed_revisions = Metrics.counter(self.__class__, 'processed_revisions')
def test_get_namespace_error(self): with self.assertRaises(ValueError): Metrics.get_namespace(object())
def process(self, element): self.static_counter_elements.inc(2) self.user_counter_elements.inc() distro = Metrics.distribution(self.__class__, 'element_dist') distro.update(element) yield element