Пример #1
0
    def flush(self):
        """Flush pool contents."""
        # Write data to in-memory buffer first.
        buf = cStringIO.StringIO()
        with records.RecordsWriter(buf) as w:
            for record in self._buffer:
                w.write(record)
            w._pad_block()
        str_buf = buf.getvalue()
        buf.close()

        if not self._exclusive and len(str_buf) > _FILES_API_MAX_SIZE:
            # Shouldn't really happen because of flush size.
            raise errors.Error(
                "Buffer too big. Can't write more than %s bytes in one request: "
                "risk of writes interleaving. Got: %s" %
                (_FILES_API_MAX_SIZE, len(str_buf)))

        # Write data to file.
        start_time = time.time()
        with files.open(self._filename, "a",
                        exclusive_lock=self._exclusive) as f:
            f.write(str_buf)
            if self._ctx:
                operation.counters.Increment(COUNTER_IO_WRITE_BYTES,
                                             len(str_buf))(self._ctx)
        if self._ctx:
            operation.counters.Increment(
                COUNTER_IO_WRITE_MSEC, int(
                    (time.time() - start_time) * 1000))(self._ctx)

        # reset buffer
        self._buffer = []
        self._size = 0
        gc.collect()
Пример #2
0
    def append(self, data):
        """Append data to a file."""
        data_length = len(data)
        if self._size + data_length > self._flush_size:
            self.flush()

        if not self._exclusive and data_length > _FILES_API_MAX_SIZE:
            raise errors.Error("Too big input %s (%s)." %
                               (data_length, _FILES_API_MAX_SIZE))
        else:
            self._buffer.append(data)
            self._size += data_length

        if self._size > self._flush_size:
            self.flush()
Пример #3
0
 def flush(self):
     """Flush pool contents."""
     start_time = time.time()
     for filename, data in self._append_buffer.iteritems():
         with files.open(filename, "a") as f:
             if len(data) > _FILES_API_MAX_SIZE:
                 raise errors.Error("Bad data of length: %s" % len(data))
             if self._ctx:
                 operation.counters.Increment(COUNTER_IO_WRITE_BYTES,
                                              len(data))(self._ctx)
             f.write(data)
     if self._ctx:
         operation.counters.Increment(
             COUNTER_IO_WRITE_MSEC, int(
                 (time.time() - start_time) * 1000))(self._ctx)
     self._append_buffer = {}
     self._size = 0
Пример #4
0
    def _get_output_sharding(cls, mapreduce_state=None, mapper_spec=None):
        """Get output sharding parameter value from mapreduce state or mapper spec.

    At least one of the parameters should not be None.

    Args:
      mapreduce_state: mapreduce state as model.MapreduceState.
      mapper_spec: mapper specification as model.MapperSpec
    """
        if mapper_spec:
            return _get_params(mapper_spec).get(
                FileOutputWriterBase.OUTPUT_SHARDING_PARAM,
                FileOutputWriterBase.OUTPUT_SHARDING_NONE).lower()
        if mapreduce_state:
            mapper_spec = mapreduce_state.mapreduce_spec.mapper
            return cls._get_output_sharding(mapper_spec=mapper_spec)
        raise errors.Error(
            "Neither mapreduce_state nor mapper_spec specified.")
Пример #5
0
    def append(self, filename, data):
        """Append data to a file.

    Args:
      filename: the name of the file as string.
      data: data as string.
    """
        if self._size + len(data) > self._flush_size:
            self.flush()

        if len(data) > _FILES_API_MAX_SIZE:
            raise errors.Error(
                "Can't write more than %s bytes in one request: "
                "risk of writes interleaving." % _FILES_API_MAX_SIZE)
        else:
            self.__append(filename, data)

        if self._size > self._flush_size:
            self.flush()