Пример #1
0
  def __iter__(self):
    ctx = context.get()
    combiner = None

    if ctx:
      combiner_spec = ctx.mapreduce_spec.mapper.params.get("combiner_spec")
      if combiner_spec:
        combiner = util.handler_for_name(combiner_spec)

    self.current_key = None
    self.current_values = None

    for binary_record in super(_ReducerReader, self).__iter__():
      proto = file_service_pb.KeyValues()
      proto.ParseFromString(binary_record)

      if self.current_key is None:
        self.current_key = proto.key()
        self.current_values = []
      else:
        assert proto.key() == self.current_key, (
            "inconsistent key sequence. Expected %s but got %s" %
            (self.current_key, proto.key()))

      if combiner:
        combiner_result = combiner(
            self.current_key, proto.value_list(), self.current_values)

        if not util.is_generator(combiner_result):
          raise errors.BadCombinerOutputError(
              "Combiner %s should yield values instead of returning them (%s)" %
              (combiner, combiner_result))

        self.current_values = []
        for value in combiner_result:
          if isinstance(value, operation.Operation):
            value(ctx)
          else:
            # with combiner current values always come from combiner
            self.current_values.append(value)
      else:
        # without combiner we just accumulate values.
        self.current_values.extend(proto.value_list())

      if not proto.partial():
        key = self.current_key
        values = self.current_values
        # This is final value, don't try to serialize it.
        self.current_key = None
        self.current_values = None
        yield (key, values)
      else:
        yield input_readers.ALLOW_CHECKPOINT
Пример #2
0
def mr_wrap(func):
    if util.is_generator(func):
        def map_func(*args, **kwargs):
            fbl = get_fblookup()
            # this passes the generator on to the client as a generator, while still having this function be detected as a generator (instead of just returning the generator directly, which would work but not let this function be detected as a generator)
            for x in func(fbl, *args, **kwargs):
                yield x
    else:
        def map_func(*args, **kwargs):
            fbl = get_fblookup()
            return func(fbl, *args, **kwargs)
    return map_func
def mr_wrap(func):
    if util.is_generator(func):

        def map_func(*args, **kwargs):
            fbl = get_fblookup()
            # this passes the generator on to the client as a generator, while still having this function be detected as a generator (instead of just returning the generator directly, which would work but not let this function be detected as a generator)
            for x in func(fbl, *args, **kwargs):
                yield x
    else:

        def map_func(*args, **kwargs):
            fbl = get_fblookup()
            return func(fbl, *args, **kwargs)

    return map_func
Пример #4
0
    def process_data(self, data, input_reader, ctx, transient_shard_state):
        """Process a single data piece.

    Call mapper handler on the data.

    Args:
      data: a datum to process.
      input_reader: input reader.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
        if data is not input_readers.ALLOW_CHECKPOINT:
            ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

            handler = ctx.mapreduce_spec.mapper.handler
            if input_reader.expand_parameters:
                result = handler(*data)
            else:
                result = handler(data)

            if util.is_generator(handler):
                for output in result:
                    if isinstance(output, operation.Operation):
                        output(ctx)
                    else:
                        output_writer = transient_shard_state.output_writer
                        if not output_writer:
                            logging.error(
                                "Handler yielded %s, but no output writer is set.",
                                output)
                        else:
                            output_writer.write(output, ctx)

        if self._time() - self._start_time > _SLICE_DURATION_SEC:
            logging.debug("Spent %s seconds. Rescheduling",
                          self._time() - self._start_time)
            return False
        return True
Пример #5
0
  def process_data(self, data, input_reader, ctx, transient_shard_state):
    """Process a single data piece.

    Call mapper handler on the data.

    Args:
      data: a datum to process.
      input_reader: input reader.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
    if data is not input_readers.ALLOW_CHECKPOINT:
      ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

      handler = ctx.mapreduce_spec.mapper.handler
      if input_reader.expand_parameters:
        result = handler(*data)
      else:
        result = handler(data)

      if util.is_generator(handler):
        for output in result:
          if isinstance(output, operation.Operation):
            output(ctx)
          else:
            output_writer = transient_shard_state.output_writer
            if not output_writer:
              logging.error(
                  "Handler yielded %s, but no output writer is set.", output)
            else:
              output_writer.write(output, ctx)

    if self._time() - self._start_time > _SLICE_DURATION_SEC:
      logging.debug("Spent %s seconds. Rescheduling",
                    self._time() - self._start_time)
      return False
    return True
 def testNotGenerator(self):
   self.assertFalse(util.is_generator(test_handler_function))
 def testGenerator(self):
   self.assertTrue(util.is_generator(test_handler_yield))