def test_subsample_array(): t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # this will modify the original array and shuffle it r = utils.subsample_array(t, 5) assert len(r) == 5 assert set(r).issubset(t)
def _subsample_trackers( self, incoming_trackers: List[TrackerWithCachedStates], max_number_of_trackers: int) -> List[TrackerWithCachedStates]: """Subsample the list of trackers to retrieve a random subset.""" # if flows get very long and have a lot of forks we # get into trouble by collecting too many trackers # hence the sub sampling if max_number_of_trackers is not None: return utils.subsample_array(incoming_trackers, max_number_of_trackers, rand=self.config.rand) else: return incoming_trackers
def _create_start_trackers_for_augmentation( self, story_end_trackers: List[TrackerWithCachedStates] ) -> TrackerLookupDict: """This is where the augmentation magic happens. We will reuse all the trackers that reached the end checkpoint `None` (which is the end of a story) and start processing all steps again. So instead of starting with a fresh tracker, the second and all following phases will reuse a couple of the trackers that made their way to a story end. We need to do some cleanup before processing them again. """ next_active_trackers = defaultdict(list) if self.config.use_story_concatenation: ending_trackers = utils.subsample_array( story_end_trackers, self.config.augmentation_factor, rand=self.config.rand, ) for t in ending_trackers: # this is a nasty thing - all stories end and # start with action listen - so after logging the first # actions in the next phase the trackers would # contain action listen followed by action listen. # to fix this we are going to "undo" the last action listen # tracker should be copied, # otherwise original tracker is updated aug_t = t.copy() aug_t.is_augmented = True aug_t.update(ActionReverted()) next_active_trackers[STORY_START].append(aug_t) return next_active_trackers
def test_subsample_array_read_only(): t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] r = utils.subsample_array(t, 5, can_modify_incoming_array=False) assert len(r) == 5 assert set(r).issubset(t)