def process_entity(self, entity, ctx, transient_shard_state): """Process a single entity. Call mapper handler on the entity. Args: entity: an entity to process. ctx: current execution context. Returns: True if scan should be continued, False if scan should be aborted. """ ctx.counters.increment(context.COUNTER_MAPPER_CALLS) handler = ctx.mapreduce_spec.mapper.handler if util.is_generator_function(handler): for result in handler(entity): if isinstance(result, operation.Operation): result(ctx) else: output_writer = transient_shard_state.output_writer if not output_writer: logging.error( "Handler yielded %s, but no output writer is set.", result) else: output_writer.write(result, ctx) else: handler(entity) if self._time() - self._start_time > _SLICE_DURATION_SEC: logging.debug("Spent %s seconds. Rescheduling", self._time() - self._start_time) return False return True
def process_data(self, data, input_reader, ctx, transient_shard_state): """Process a single data piece. Call mapper handler on the data. Args: data: a datum to process. input_reader: input reader. ctx: current execution context. Returns: True if scan should be continued, False if scan should be aborted. """ if data is not input_readers.ALLOW_CHECKPOINT: ctx.counters.increment(context.COUNTER_MAPPER_CALLS) handler = ctx.mapreduce_spec.mapper.handler if input_reader.expand_parameters: result = handler(*data) else: result = handler(data) if util.is_generator_function(handler): for output in result: if isinstance(output, operation.Operation): output(ctx) else: output_writer = transient_shard_state.output_writer if not output_writer: logging.error( "Handler yielded %s, but no output writer is set.", output) else: output_writer.write(output, ctx) if self._time() - self._start_time > _SLICE_DURATION_SEC: logging.debug("Spent %s seconds. Rescheduling", self._time() - self._start_time) return False return True
def process_entity(self, entity, ctx): """Process a single entity. Call mapper handler on the entity. Args: entity: an entity to process. ctx: current execution context. Returns: True if scan should be continued, False if scan should be aborted. """ ctx.counters.increment(context.COUNTER_MAPPER_CALLS) handler = ctx.mapreduce_spec.mapper.handler if util.is_generator_function(handler): for result in handler(entity): if callable(result): result(ctx) else: try: if len(result) == 2: logging.error("Collectors not implemented yet") else: logging.error("Got bad output tuple of length %d", len(result)) except TypeError: logging.error( "Handler yielded type %s, expected a callable or a tuple", result.__class__.__name__) else: handler(entity) if self._time() - self._start_time > _SLICE_DURATION_SEC: logging.debug("Spent %s seconds. Rescheduling", self._time() - self._start_time) return False return True
def process_entity(self, entity, quota_consumer, ctx): """Process a single entity. Call mapper handler on the entity. Args: entity: an entity to process. quota_consumer: an instance of quota.QuotaConsumer for current run. ctx: current execution context. Returns: True if scan should be continued, False if scan should be aborted. """ ctx.counters.increment(context.COUNTER_MAPPER_CALLS) handler = ctx.mapreduce_spec.mapper.handler if util.is_generator_function(handler): for result in handler(entity): if callable(result): result(ctx) else: try: if len(result) == 2: logging.error("Collectors not implemented yet") else: logging.error("Got bad output tuple of length %d", len(result)) except TypeError: logging.error( "Handler yielded type %s, expected a callable or a tuple", result.__class__.__name__) else: handler(entity) if self._time() - self._start_time > _SLICE_DURATION_SEC: logging.debug("Spent %s seconds. Rescheduling", self._time() - self._start_time) return False return True
def testNotGenerator(self): self.assertFalse(util.is_generator_function(test_handler_function))
def testGenerator(self): self.assertTrue(util.is_generator_function(test_handler_yield))