def test_try_claim(self): tracker = range_trackers.UnsplittableRangeTracker( range_trackers.OffsetRangeTracker(100, 200)) self.assertTrue(tracker.try_claim(110)) self.assertTrue(tracker.try_claim(140)) self.assertTrue(tracker.try_claim(183)) self.assertFalse(tracker.try_claim(210))
def get_range_tracker(self, start_position, stop_position): if start_position is None: start_position = 0 if stop_position is None: stop_position = self._num_records tracker = range_trackers.OffsetRangeTracker(start_position, stop_position) if self._dynamic_splitting == 'none': tracker = range_trackers.UnsplittableRangeTracker(tracker) return tracker
def test_try_split_fails(self): tracker = range_trackers.UnsplittableRangeTracker( range_trackers.OffsetRangeTracker(100, 200)) self.assertTrue(tracker.try_claim(110)) # Out of range self.assertFalse(tracker.try_split(109)) self.assertFalse(tracker.try_split(210)) # Within range. But splitting is still unsuccessful. self.assertFalse(copy.copy(tracker).try_split(111)) self.assertFalse(copy.copy(tracker).try_split(130)) self.assertFalse(copy.copy(tracker).try_split(199))
def get_range_tracker(self, start_position, stop_position): """Implements :class:`~apache_beam.io.iobase.BoundedSource.get_range_tracker`""" if start_position is None: start_position = 0 if stop_position is None: stop_position = range_trackers.OffsetRangeTracker.OFFSET_INFINITY # Use an unsplittable range tracker. This means that a collection can # only be read sequentially for now. range_tracker = range_trackers.OffsetRangeTracker(start_position, stop_position) range_tracker = range_trackers.UnsplittableRangeTracker(range_tracker) return range_tracker
def get_range_tracker(self, start_position=0, stop_position=None): """ Implement the method `apache_beam.io.iobase.BoundedSource.get_range_tracker`. `BillboardSource` uses an unsplittable range tracker, which means that a collection can only be read sequentially. However, the range tracker must still be defined. """ self.logger.debug('Creating the range tracker.') stop_position = range_trackers.OffsetRangeTracker.OFFSET_INFINITY range_tracker = range_trackers.OffsetRangeTracker(0, stop_position) range_tracker = range_trackers.UnsplittableRangeTracker(range_tracker) return range_tracker
def get_range_tracker(self, start_position=0, stop_position=None): """ Implements class: `apache_beam.io.iobase.BoundedSource.get_range_tracker` This class uses an unsplittable range tracker. This means that a collection can only be read sequentially. However, the ranger must be defined. """ stop_position = range_trackers.OffsetRangeTracker.OFFSET_INFINITY range_tracker = range_trackers.OffsetRangeTracker(0, stop_position) range_tracker = range_trackers.UnsplittableRangeTracker(range_tracker) return range_tracker
def get_range_tracker(self, start_position, stop_position): if start_position is None: start_position = self._start_offset if stop_position is None: # If file is unsplittable we choose OFFSET_INFINITY as the default end # offset so that all data of the source gets read. Choosing size of the # file as end offset will be wrong for certain unsplittable source, for # e.g., compressed sources. stop_position = (self._stop_offset if self._splittable else range_trackers.OffsetRangeTracker.OFFSET_INFINITY) range_tracker = range_trackers.OffsetRangeTracker( start_position, stop_position) if not self._splittable: range_tracker = range_trackers.UnsplittableRangeTracker( range_tracker) return range_tracker