def add_sample_to_set(self, sample_id: Sample, sample_set: SplitSampleSet): """ Add new sample to sample set. """ split = sample_set.split_type self.sample_splits[sample_id] = split self.unused_sample_ids.remove(sample_id) sample_set.update(sample_id, self.full_sample_set, inplace=True)
def peek_sample(self, sample_id: str, sample_set_to_update: SplitSampleSet, other_sample_set: SplitSampleSet) -> float: """ Check score for adding sample `sample_id` to `sample_set_to_update` without actually making the update (not in-place). Args: sample_id (str): id of sample to check update for. sample_set_to_update (SplitSampleSet): Sample set to be updated with chosen sample. other_sample_set (SplitSampleSet): The other sample set (not updated). Returns: float: Split score if we had added `sample_id` to `sample_set_to_update` """ a_dist, c_dist = sample_set_to_update.update(sample_id, self.full_sample_set, inplace=False) if sample_set_to_update.is_train: train_a_dist = a_dist train_c_dist = c_dist test_a_dist = other_sample_set.atom_distribution test_c_dist = other_sample_set.compound_distribution else: test_a_dist = a_dist test_c_dist = c_dist train_a_dist = other_sample_set.atom_distribution train_c_dist = other_sample_set.compound_distribution return self.score(train_a_dist, test_a_dist, train_c_dist, test_c_dist)
def peek_ray(sample_id: str, sample_set_to_update: SplitSampleSet, other_sample_set: SplitSampleSet, dbca_config: DBCASplitterConfig, full_sample_set: FullSampleSet) -> float: """ """ a_dist, c_dist = sample_set_to_update.update(sample_id, full_sample_set, inplace=False) if sample_set_to_update.is_train: train_a_dist = a_dist train_c_dist = c_dist test_a_dist = other_sample_set.atom_distribution test_c_dist = other_sample_set.compound_distribution else: test_a_dist = a_dist test_c_dist = c_dist train_a_dist = other_sample_set.atom_distribution train_c_dist = other_sample_set.compound_distribution return score(train_a_dist, test_a_dist, train_c_dist, test_c_dist, dbca_config)
def peek_ray(sample_id: str, sample_set_to_update: SplitSampleSet, other_sample_set: SplitSampleSet, dbca_config: DBCASplitterConfig, full_sample_set: FullSampleSet) -> float: """ """ # print(f"[_outer_peek_mp]: Starting work on {sample_id}... ") a_dist, c_dist = sample_set_to_update.update(sample_id, full_sample_set, inplace=False) if sample_set_to_update.is_train: train_a_dist = a_dist train_c_dist = c_dist test_a_dist = other_sample_set.atom_distribution test_c_dist = other_sample_set.compound_distribution else: test_a_dist = a_dist test_c_dist = c_dist train_a_dist = other_sample_set.atom_distribution train_c_dist = other_sample_set.compound_distribution # print(f"[_outer_peek_mp]: Done work on {sample_id}...! ") return score(train_a_dist, test_a_dist, train_c_dist, test_c_dist, dbca_config)