Exemplo n.º 1
0
def test_subset_with_no_seed():
    dataset = list(np.random.rand(100))

    subset1 = get_random_subset(dataset, 10)
    subset2 = get_random_subset(dataset, 10)

    assert subset1 != subset2
Exemplo n.º 2
0
def test_subset_with_random_seed():
    dataset = list(np.random.rand(100))

    subset1 = get_random_subset(dataset, 10, seed=33)
    subset2 = get_random_subset(dataset, 10, seed=33)
    subset3 = get_random_subset(dataset, 10, seed=43)

    assert subset1 == subset2
    assert subset1 != subset3
Exemplo n.º 3
0
def test_subset():
    dataset = list(np.random.rand(100))

    subset = get_random_subset(dataset, 10)

    for s in subset:
        assert s in dataset
Exemplo n.º 4
0
    def __init__(self, training_set: List[str],
                 chemnet_model_filename='ChemNet_v0.13_pretrained.h5',
                 sample_size=10000) -> None:
        """
        Args:
            training_set: molecules from the training set
            chemnet_model_filename: name of the file for trained ChemNet model.
                Must be present in the 'fcd' package, since it will be loaded directly from there.
            sample_size: how many molecules to generate the distribution statistics from (both reference data and model)
        """
        self.chemnet_model_filename = chemnet_model_filename
        self.sample_size = sample_size
        super().__init__(name='Frechet ChemNet Distance', number_samples=self.sample_size)

        self.reference_molecules = get_random_subset(training_set, self.sample_size, seed=42)
 def __init__(self, number_samples: int, training_set: List[str]) -> None:
     """
     Args:
         number_samples: number of samples to generate from the model
         training_set: molecules from the training set
     """
     super().__init__(name='KL divergence', number_samples=number_samples)
     self.training_set_molecules = canonicalize_list(
         get_random_subset(training_set, self.number_samples, seed=42),
         include_stereocenters=False)
     self.pc_descriptor_subset = [
         'BertzCT', 'MolLogP', 'MolWt', 'TPSA', 'NumHAcceptors',
         'NumHDonors', 'NumRotatableBonds', 'NumAliphaticRings',
         'NumAromaticRings'
     ]
Exemplo n.º 6
0
def test_subset_if_dataset_too_small():
    dataset = list(np.random.rand(100))

    with pytest.raises(Exception):
        get_random_subset(dataset, 1000)