def deduplicate(self, select_best=False): """De-duplicates Samples with identical structures. Args: select_best: Whether to select the sample with the highest reward among samples with the same structure. Otherwise, the sample that occurs first will be selected. Returns: A Population with de-duplicated samples. """ return Population(utils.deduplicate_samples(self, select_best=select_best))
def best_n(self, n=1, q=None, discard_duplicates=False, blacklist=None): """Returns the best n samples. Note that ties are broken deterministically. Args: n: Max number to return q: A float in (0, 1) corresponding to the minimum quantile for selecting samples. If provided, `n` is ignored and samples with a reward >= this quantile are selected. discard_duplicates: If True, when several samples have the same structure, return only one of them (the selected one is unspecified). blacklist: Iterable of structures that should be excluded. Returns: Population containing the best n Samples, sorted in decreasing order of reward (output[0] is the best). Returns less than n if there are fewer than n Samples in the population. """ if self.empty: raise ValueError('Population empty.') samples = self.samples if blacklist: samples = self._filter(samples, blacklist) # are unique. if discard_duplicates and len(samples) > 1: samples = utils.deduplicate_samples(samples) samples = sorted(samples, key=lambda sample: sample.reward, reverse=True) if q is not None: q_value = np.quantile([sample.reward for sample in samples], q) return Population( [sample for sample in samples if sample.reward >= q_value]) else: return Population(samples[:n])