def test_builder_returns_false_when_smiles_already_present():
    uut = OptimizedMolecules.Builder()
    uut.append('C', col1=1)

    result = uut.append('C', col2=2)

    assert not result
Exemplo n.º 2
0
def test_to_csv_call_with_without_columns_and_columns_raises(tmp_path):
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0, col2=1)
    uut = builder.build()

    with pytest.raises(ValueError):
        uut.to_csv('dummy_path', columns=[], without_columns=[])
def test_top_fraction_raises_with_fraction_greater_than_one():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0)
    uut = builder.build()

    with pytest.raises(ValueError):
        uut.get_first_fraction(1.2, by_column='col1')
def test_first_n_raises_with_zero_n():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0)
    uut = builder.build()

    with pytest.raises(ValueError):
        uut.get_first_n(0, by_column='col1')
def test_first_fraction_raises_with_negative_fraction():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0)
    uut = builder.build()

    with pytest.raises(ValueError):
        uut.get_first_fraction(-1, by_column='col1')
def test_most_similar_tanimoto_multiple_elements_happy_path():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0)
    builder.append('Cl', col1=0)
    builder.append('F', col1=0)
    uut = builder.build()
    expected = pd.DataFrame.from_dict(
        {
            'C': {
                'tanimoto_similarity': 1.0,
                'most_similar_smiles': 'C'
            },
            'F': {
                'tanimoto_similarity': 1.0,
                'most_similar_smiles': 'F'
            },
            'Cl': {
                'tanimoto_similarity': 0.0,
                'most_similar_smiles': 'C'
            },
        },
        orient='index')

    result = uut.most_similar_tanimoto(['C', 'F'])

    assert result.equals(expected)
def test_builder_in_operator_returns_true_when_smiles_already_present():
    uut = OptimizedMolecules.Builder()
    uut.append('C', col2=2)

    result = 'C' in uut

    assert result
def test_top_fraction_raises_with_non_existing_column():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0)
    uut = builder.build()

    with pytest.raises(ValueError):
        uut.get_first_fraction(1, by_column='no_such_column')
def test_rmse_raises_when_col2_contains_nans():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=1)
    builder.append('H', col1=1, col2=2)
    uut = builder.build()

    with pytest.raises(ValueError):
        uut.rmse('col1', 'col2')
def test_rmse_raises_when_col2_not_in_molecules():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col=1)
    builder.append('H', col=2)
    uut = builder.build()

    with pytest.raises(ValueError):
        uut.rmse('col', 'no_such_column')
def test_builder_does_not_calculate_validity_when_total_samples_is_zero():
    uut = OptimizedMolecules.Builder()
    uut.append('C', col1=1)
    uut.append('H', col2=2)

    result = uut.build()

    assert 'validity' not in result.metrics
Exemplo n.º 12
0
def test_builder_happy_path():
    uut = OptimizedMolecules.Builder()
    uut.append('C', col1=1, col2=2)
    uut.append('H', col1=3, col2=4)
    expected = pd.DataFrame.from_dict({'C': {'col1': 1, 'col2': 2}, 'H': {'col1': 3, 'col2': 4}}, orient='index')

    result = uut.build()

    assert result.molecules.equals(expected)
def test_rmse_no_error_happy_path():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0, col2=0)
    builder.append('H', col1=1, col2=1)
    uut = builder.build()

    result = uut.rmse('col1', 'col2')

    assert result == 0
def test_builder_calculates_validity_when_total_samples_positive():
    uut = OptimizedMolecules.Builder()
    uut.append('C', col1=1)
    uut.append('H', col2=2)
    uut.total_samples += 10

    result = uut.build()

    assert result.metrics['validity'] == 0.2
Exemplo n.º 15
0
def test_builder_happy_path_with_missing_data():
    uut = OptimizedMolecules.Builder()
    uut.append('C', col1=1)
    uut.append('H', col2=2)
    expected = pd.DataFrame.from_dict({'C': {'col1': 1}, 'H': {'col2': 2}}, orient='index')

    result = uut.build()

    assert result.molecules.equals(expected)
def test_top_n_happy_path_descending():
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0)
    builder.append('H', col1=1)
    uut = builder.build()
    expected = pd.DataFrame.from_dict({'H': {'col1': 1}}, orient='index')

    result = uut.get_first_fraction(.5, by_column='col1', sort_ascending=False)

    assert result.equals(expected)
def test_to_csv_happy_path(tmp_path):
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0, col2=1)
    builder.append('H', col1=2.5, col2=1)
    uut = builder.build()
    file = tmp_path / 'tmp_file.csv'
    expected = textwrap.dedent('''\
    SMILES,col1,col2
    C,0.0,1
    H,2.5,1
    ''')
    uut.to_csv(file)

    assert file.read_text() == expected
Exemplo n.º 18
0
    def descent_steps(self, smiles_docking_score_fn, size):
        assert smiles_docking_score_fn is not None
        assert size is not None
        logger.info('Descent steps start')

        batch_size = 32
        min_valid_steps = 5

        results = []

        while len(results) < size:
            logger.info(f'Valid descent steps results: {len(results)} / {size}')
            one_hots = self.train[np.random.choice(self.train.shape[0], batch_size, replace=False), :]
            latents = self.cvae.encode(one_hots)
            latent_changes = [latents]

            for _ in range(self.descent_iterations):
                latents += self.mlp.gradient(latents) * self.descent_lr
                latent_changes.append(np.copy(latents))

            decoded_series = [self.cvae.hot_to_smiles(self.cvae.decode(delta), strip=True, numpy=True)
                              for delta in latent_changes]

            smiles_changes = [
                [canonicalize(smi) for smi in smi_list]
                for smi_list in decoded_series
            ]

            for i in range(batch_size):
                descent_results = OptimizedMolecules.Builder()

                for j in range(self.descent_iterations + 1):
                    smi = smiles_changes[j][i]

                    if smi is not None and is_valid(smi):
                        try:
                            docking_score = smiles_docking_score_fn(smi, n_cpu=self.docking_n_cpu)
                            descent_results.append(
                                smi,
                                latent_vector=latent_changes[j][i],
                                step=j,
                                **docking_score
                            )
                        except (ValueError, RuntimeError, TypeError):
                            logger.error('Docking failed for %s', smi)

                if descent_results.size > min_valid_steps:
                    results.append(descent_results.build())

        return results
def test_to_csv_with_without_columns(tmp_path):
    builder = OptimizedMolecules.Builder()
    builder.append('C', col1=0, col2=1)
    builder.append('H', col1=2.5, col2=1)
    uut = builder.build()
    file = tmp_path / 'tmp_file.csv'
    expected = textwrap.dedent('''\
    SMILES,col1
    C,0.0
    H,2.5
    ''')
    uut.to_csv(file, without_columns=['col2'])

    assert file.read_text() == expected
Exemplo n.º 20
0
    def generate_optimized_molecules(self, number_molecules, smiles_docking_score_fn):
        results_builder = OptimizedMolecules.Builder()

        while results_builder.size < number_molecules:
            logger.info(f'Generated {results_builder.size} / {number_molecules}')
            latents = np.random.normal(size=(self.batch_size, self.latent))

            before = [self.mlp.latent_score(latents[i].reshape(1, -1))
                      for i in range(self.batch_size)]

            for _ in range(self.descent_iterations):
                latents += self.mlp.gradient(latents) * self.descent_lr

            try:
                smiles = [canonicalize(smi) for smi in
                          self.cvae.hot_to_smiles(self.cvae.decode(latents), strip=True, numpy=True)]
            except (RuntimeError, ValueError):
                logger.error('Decoding failed')
                continue

            for i, smi in enumerate(smiles):
                try:
                    if smi is not None and is_valid(smi) and lipinski_filter(smi):
                        if smi not in results_builder:
                            latent_score = self.mlp.latent_score(latents[i].reshape(1, -1))
                            logger.info(f'Optimized from {before[i]} to {latent_score}')
                            output_path = os.path.join(
                                self.output_dir,
                                f'{results_builder.size}.mol2') if self.output_dir is not None else None
                            docking_score = smiles_docking_score_fn(smi, output_path=output_path,
                                                                    n_cpu=self.docking_n_cpu)
                            results_builder.append(
                                smi,
                                latent_vector=latents[i],
                                predicted_score=latent_score,
                                **docking_score
                            )
                        else:
                            logger.info('Generated SMILES %s already present in OptimizedMoleculesBuilder', smi)
                except Exception:
                    logger.error('Docking failed for ' + smi)

                if results_builder.size >= number_molecules:
                    logger.info('Generating finished')
                    break

            results_builder.total_samples += self.batch_size

        return results_builder.build()
Exemplo n.º 21
0
    def random_gauss(self, smiles_docking_score_fn, size):
        assert smiles_docking_score_fn is not None
        assert size is not None

        logger.info('Random gauss sampling start')

        results_builder = OptimizedMolecules.Builder()

        while results_builder.size < size:
            logger.info(f'Random sampled {results_builder.size} / {size}')
            latents = np.random.normal(size=(self.batch_size, self.latent))

            smiles = [canonicalize(smi) for smi in
                      self.cvae.hot_to_smiles(self.cvae.decode(latents), strip=True, numpy=True)]

            for i, smi in enumerate(smiles):
                try:
                    if smi is not None and is_valid(smi) and lipinski_filter(smi):
                        if smi not in results_builder:
                            output_path = os.path.join(
                                self.output_dir,
                                f'{results_builder.size}.gauss.mol2') if self.output_dir is not None else None
                            docking_score = smiles_docking_score_fn(smi, output_path=output_path,
                                                                    n_cpu=self.docking_n_cpu)
                            results_builder.append(
                                smi,
                                latent_vector=latents[i],
                                predicted_score=self.mlp.latent_score(latents[i].reshape(1, -1)),
                                **docking_score
                            )
                        else:
                            logger.info('Generated SMILES %s already present in OptimizedMoleculesBuilder', smi)
                except Exception:
                    logger.error('Docking failed for ' + smi)

                if results_builder.size >= size:
                    logger.info('Random sampling finished')
                    break

            results_builder.total_samples += self.batch_size

        logger.info('Random gauss sampling finished')

        return results_builder.build()
def test_builder_should_raise_when_smiles_is_not_str():
    uut = OptimizedMolecules.Builder()

    with pytest.raises(TypeError):
        uut.append(34)
def test_builder_should_raise_on_empty_smiles():
    uut = OptimizedMolecules.Builder()

    with pytest.raises(ValueError):
        uut.append('')
def test_builder_in_operator_returns_false_when_smiles_not_already_present():
    uut = OptimizedMolecules.Builder()

    result = 'C' in uut

    assert not result
def test_to_csv_call_with_without_columns_and_columns_raises(tmp_path):
    uut = OptimizedMolecules.Builder().build()

    with pytest.raises(ValueError):
        uut.to_csv('dummy_path', columns=[], without_columns=[])