def _recover(self, mr_spec, shard_number, shard_attempt): next_seg_index = self._seg_index if self._seg_valid_length != 0: try: gcs_next_offset = self._streaming_buffer._get_offset_from_gcs() + 1 if gcs_next_offset > self._streaming_buffer.tell(): self._streaming_buffer._force_close(gcs_next_offset) else: self._streaming_buffer.close() except cloudstorage.FileClosedError: pass cloudstorage_api._copy2( self._streaming_buffer.name, self._streaming_buffer.name, metadata={self._VALID_LENGTH: self._seg_valid_length}) next_seg_index = self._seg_index + 1 writer_spec = self.get_params(mr_spec.mapper, allow_old=False) key = self._generate_filename( writer_spec, mr_spec.name, mr_spec.mapreduce_id, shard_number, shard_attempt, next_seg_index) new_writer = self._create(writer_spec, key) new_writer._seg_index = next_seg_index return new_writer
def _recover(self, mr_spec, shard_number, shard_attempt): next_seg_index = self._seg_index if self._seg_valid_length != 0: try: gcs_next_offset = self._streaming_buffer._get_offset_from_gcs() + 1 if gcs_next_offset > self._streaming_buffer.tell(): self._streaming_buffer._force_close(gcs_next_offset) else: self._streaming_buffer.close() except cloudstorage.FileClosedError: pass cloudstorage_api._copy2( self._streaming_buffer.name, self._streaming_buffer.name, metadata={self._VALID_LENGTH: self._seg_valid_length}) next_seg_index = self._seg_index + 1 writer_spec = _get_params(mr_spec.mapper, allow_old=False) key = self._generate_filename( writer_spec, mr_spec.name, mr_spec.mapreduce_id, shard_number, shard_attempt, next_seg_index) new_writer = self._create(writer_spec, key) new_writer._seg_index = next_seg_index return new_writer
def finalize(self, ctx, shard_state): self._streaming_buffer.close() if self._no_dup: cloudstorage_api._copy2( self._streaming_buffer.name, self._streaming_buffer.name, metadata={self._VALID_LENGTH: self._streaming_buffer.tell()}) mr_spec = ctx.mapreduce_spec writer_spec = self.get_params(mr_spec.mapper, allow_old=False) filename = self._generate_filename(writer_spec, mr_spec.name, mr_spec.mapreduce_id, shard_state.shard_number) seg_filename = self._streaming_buffer.name prefix, last_index = seg_filename.rsplit("-", 1) shard_state.writer_state = {self._SEG_PREFIX: prefix + "-", self._LAST_SEG_INDEX: int(last_index), "filename": filename} else: shard_state.writer_state = {"filename": self._streaming_buffer.name}
def finalize(self, ctx, shard_state): self._streaming_buffer.close() if self._no_dup: cloudstorage_api._copy2( self._streaming_buffer.name, self._streaming_buffer.name, metadata={self._VALID_LENGTH: self._streaming_buffer.tell()}) mr_spec = ctx.mapreduce_spec writer_spec = _get_params(mr_spec.mapper, allow_old=False) filename = self._generate_filename(writer_spec, mr_spec.name, mr_spec.mapreduce_id, shard_state.shard_number) seg_filename = self._streaming_buffer.name prefix, last_index = seg_filename.rsplit("-", 1) shard_state.writer_state = {self._SEG_PREFIX: prefix + "-", self._LAST_SEG_INDEX: int(last_index), "filename": filename} else: shard_state.writer_state = {"filename": self._streaming_buffer.name}