示例#1
0
    def add_sample_to_set(self, sample_id: Sample, sample_set: SplitSampleSet):
        """ 
        Add new sample to sample set.
        """
        split = sample_set.split_type
        self.sample_splits[sample_id] = split
        self.unused_sample_ids.remove(sample_id)

        sample_set.update(sample_id, self.full_sample_set, inplace=True)
示例#2
0
    def peek_sample(self, sample_id: str, sample_set_to_update: SplitSampleSet,
                         other_sample_set: SplitSampleSet) -> float:
        """
        Check score for adding sample `sample_id` to `sample_set_to_update` without actually
        making the update (not in-place).

        Args:
            sample_id (str): id of sample to check update for.
            sample_set_to_update (SplitSampleSet): Sample set to be updated with chosen sample.
            other_sample_set (SplitSampleSet): The other sample set (not updated).

        Returns:
            float: Split score if we had added `sample_id` to `sample_set_to_update`
        """
        a_dist, c_dist = sample_set_to_update.update(sample_id, self.full_sample_set, 
                                                     inplace=False)
        if sample_set_to_update.is_train:
            train_a_dist = a_dist
            train_c_dist = c_dist
            test_a_dist = other_sample_set.atom_distribution
            test_c_dist = other_sample_set.compound_distribution
        else:
            test_a_dist = a_dist
            test_c_dist = c_dist
            train_a_dist = other_sample_set.atom_distribution
            train_c_dist = other_sample_set.compound_distribution
            
        return self.score(train_a_dist, test_a_dist, train_c_dist, test_c_dist)
def peek_ray(sample_id: str, sample_set_to_update: SplitSampleSet,
                         other_sample_set: SplitSampleSet, 
                         dbca_config: DBCASplitterConfig, full_sample_set: FullSampleSet) -> float:
    """ 
    
    """
    a_dist, c_dist = sample_set_to_update.update(sample_id,
                                                    full_sample_set,
                                                    inplace=False)
    if sample_set_to_update.is_train:
        train_a_dist = a_dist
        train_c_dist = c_dist
        test_a_dist = other_sample_set.atom_distribution
        test_c_dist = other_sample_set.compound_distribution
    else:
        test_a_dist = a_dist
        test_c_dist = c_dist
        train_a_dist = other_sample_set.atom_distribution
        train_c_dist = other_sample_set.compound_distribution
    return score(train_a_dist, test_a_dist, train_c_dist, test_c_dist, 
                 dbca_config)
示例#4
0
def peek_ray(sample_id: str, sample_set_to_update: SplitSampleSet,
             other_sample_set: SplitSampleSet, dbca_config: DBCASplitterConfig,
             full_sample_set: FullSampleSet) -> float:
    """ 
    
    """
    # print(f"[_outer_peek_mp]: Starting work on {sample_id}... ")
    a_dist, c_dist = sample_set_to_update.update(sample_id,
                                                 full_sample_set,
                                                 inplace=False)
    if sample_set_to_update.is_train:
        train_a_dist = a_dist
        train_c_dist = c_dist
        test_a_dist = other_sample_set.atom_distribution
        test_c_dist = other_sample_set.compound_distribution
    else:
        test_a_dist = a_dist
        test_c_dist = c_dist
        train_a_dist = other_sample_set.atom_distribution
        train_c_dist = other_sample_set.compound_distribution
    # print(f"[_outer_peek_mp]: Done work on {sample_id}...! ")
    return score(train_a_dist, test_a_dist, train_c_dist, test_c_dist,
                 dbca_config)