Exemplo n.º 1
0
def test_change_constraints_cache_clear():
    alphabet = sf.get_semantic_robust_alphabet()
    assert alphabet == sf.get_semantic_robust_alphabet()
    assert sf.decoder("[C][#C]") == "C#C"

    new_constraints = sf.get_semantic_constraints()
    new_constraints["C"] = 1
    sf.set_semantic_constraints(new_constraints)

    new_alphabet = sf.get_semantic_robust_alphabet()
    assert new_alphabet != alphabet
    assert sf.decoder("[C][#C]") == "CC"

    sf.set_semantic_constraints()  # re-set alphabet
Exemplo n.º 2
0
def large_alphabet():
    alphabet = sf.get_semantic_robust_alphabet()
    alphabet.update([
        "[#Br]", "[#Branch1]", "[#Branch2]", "[#Branch3]", "[#C@@H1]",
        "[#C@@]", "[#C@H1]", "[#C@]", "[#C]", "[#Cl]", "[#F]", "[#H]", "[#I]",
        "[#NH1]", "[#N]", "[#O]", "[#P]", "[#Ring1]", "[#Ring2]", "[#Ring3]",
        "[#S]", "[/Br]", "[/C@@H1]", "[/C@@]", "[/C@H1]", "[/C@]", "[/C]",
        "[/Cl]", "[/F]", "[/H]", "[/I]", "[/NH1]", "[/N]", "[/O]", "[/P]",
        "[/S]", "[=Br]", "[=Branch1]", "[=Branch2]", "[=Branch3]", "[=C@@H1]",
        "[=C@@]", "[=C@H1]", "[=C@]", "[=C]", "[=Cl]", "[=F]", "[=H]", "[=I]",
        "[=NH1]", "[=N]", "[=O]", "[=P]", "[=Ring1]", "[=Ring2]", "[=Ring3]",
        "[=S]", "[Br]", "[Branch1]", "[Branch2]", "[Branch3]", "[C@@H1]",
        "[C@@]", "[C@H1]", "[C@]", "[C]", "[Cl]", "[F]", "[H]", "[I]", "[NH1]",
        "[N]", "[O]", "[P]", "[Ring1]", "[Ring2]", "[Ring3]", "[S]", "[\\Br]",
        "[\\C@@H1]", "[\\C@@]", "[\\C@H1]", "[\\C@]", "[\\C]", "[\\Cl]",
        "[\\F]", "[\\H]", "[\\I]", "[\\NH1]", "[\\N]", "[\\O]", "[\\P]",
        "[\\S]", "[nop]"
    ])
    return list(alphabet)
Exemplo n.º 3
0
def hard_alphabet():
    """A challenging alphabet of SELFIES symbols.
    """

    alphabet = sf.get_semantic_robust_alphabet()
    alphabet.update([
        '[#Br]', '[#C@@Hexpl]', '[#C@@expl]', '[#C@Hexpl]', '[#C@expl]',
        '[#C]', '[#Cl]', '[#F]', '[#Hexpl]', '[#I]', '[#NHexpl]', '[#N]',
        '[#O]', '[#P]', '[#S]', '[/Br]', '[/C@@Hexpl]', '[/C@@expl]',
        '[/C@Hexpl]', '[/C@expl]', '[/C]', '[/Cl]', '[/F]', '[/Hexpl]', '[/I]',
        '[/NHexpl]', '[/N]', '[/O]', '[/P]', '[/S]', '[=Br]', '[=C@@Hexpl]',
        '[=C@@expl]', '[=C@Hexpl]', '[=C@expl]', '[=C]', '[=Cl]', '[=F]',
        '[=Hexpl]', '[=I]', '[=NHexpl]', '[=N]', '[=O]', '[=P]', '[=S]',
        '[Br]', '[Branch1_1]', '[Branch1_2]', '[Branch1_3]', '[Branch2_1]',
        '[Branch2_2]', '[Branch2_3]', '[Branch3_1]', '[Branch3_2]',
        '[Branch3_3]', '[C@@Hexpl]', '[C@@expl]', '[C@Hexpl]', '[C@expl]',
        '[C]', '[Cl]', '[Expl#Ring1]', '[Expl=Ring1]', '[F]', '[Hexpl]', '[I]',
        '[NHexpl]', '[N]', '[O]', '[P]', '[Ring1]', '[Ring2]', '[Ring3]',
        '[S]', '[\\Br]', '[\\C@@Hexpl]', '[\\C@@expl]', '[\\C@Hexpl]',
        '[\\C@expl]', '[\\C]', '[\\Cl]', '[\\F]', '[\\Hexpl]', '[\\I]',
        '[\\NHexpl]', '[\\N]', '[\\O]', '[\\P]', '[\\S]', '[epsilon]', '[nop]'
    ])

    return list(alphabet)
Exemplo n.º 4
0
from rdkit import Chem
from selfies import encoder, decoder, get_semantic_robust_alphabet
import re
import random
import logging as logger


ALPHABET = [symbol.strip("[]") for symbol in get_semantic_robust_alphabet()]

# follow these rules for which types of substitution should be allowed
ALLOWED_SUBS = {
    "C": ["N", "O", "H"],
    "=C": ["=N", "N", "O", "=O", "S"],
    "F": ["Cl", "Br", "I", "O", "N", "C", "H"],
    "Cl": ["F", "Br", "I", "O", "N", "C", "H"],
    "Br": ["Cl", "F", "I", "O", "N", "C", "H"],
    "I": ["Cl", "Br", "F", "O", "N", "C", "H"],
    "H": ["C", "O", "N", "S", "=C", "=O", "=S"],
    "O": ["S", "N", "=O", "=N", "C", "=C", "Cl", "F", "Br", "I", "H"],
    "=O": ["=S", "=N", "=C", "O"],
    "N": ["O", "C", "H"],
    "=N": ["=O", "O", "S", "=C", "C"],
    "#N": ["#C"],
    "S": ["O", "N", "C", "=O", "=N", "H"],
    "=S": ["=O", "=N", "=C", "O"],
    "#C": ["#N"]
}


def selfies_substitution(*,
                         parent_smiles: str,
Exemplo n.º 5
0
def mutate_selfie(selfie, max_molecules_len, write_fail_cases=False):
    '''Return a mutated selfie string (only one mutation on slefie is performed)
    
    Mutations are done until a valid molecule is obtained 
    Rules of mutation: With a 33.3% propbabily, either: 
        1. Add a random SELFIE character in the string
        2. Replace a random SELFIE character with another
        3. Delete a random character
    
    Parameters:
    selfie            (string)  : SELFIE string to be mutated 
    max_molecules_len (int)     : Mutations of SELFIE string are allowed up to this length
    write_fail_cases  (bool)    : If true, failed mutations are recorded in "selfie_failure_cases.txt"
    
    Returns:
    selfie_mutated    (string)  : Mutated SELFIE string
    smiles_canon      (string)  : canonical smile of mutated SELFIE string
    '''
    valid = False
    fail_counter = 0
    chars_selfie = get_selfie_chars(selfie)

    while not valid:
        fail_counter += 1

        alphabet = list(
            selfies.get_semantic_robust_alphabet())  # 34 SELFIE characters

        choice_ls = [1, 2, 3]  # 1=Insert; 2=Replace; 3=Delete
        random_choice = np.random.choice(choice_ls, 1)[0]

        # Insert a character in a Random Location
        if random_choice == 1:
            random_index = np.random.randint(len(chars_selfie) + 1)
            random_character = np.random.choice(alphabet, size=1)[0]

            selfie_mutated_chars = chars_selfie[:random_index] + [
                random_character
            ] + chars_selfie[random_index:]

        # Replace a random character
        elif random_choice == 2:
            random_index = np.random.randint(len(chars_selfie))
            random_character = np.random.choice(alphabet, size=1)[0]
            if random_index == 0:
                selfie_mutated_chars = [random_character
                                        ] + chars_selfie[random_index + 1:]
            else:
                selfie_mutated_chars = chars_selfie[:random_index] + [
                    random_character
                ] + chars_selfie[random_index + 1:]

        # Delete a random character
        elif random_choice == 3:
            random_index = np.random.randint(len(chars_selfie))
            if random_index == 0:
                selfie_mutated_chars = chars_selfie[random_index + 1:]
            else:
                selfie_mutated_chars = chars_selfie[:
                                                    random_index] + chars_selfie[
                                                        random_index + 1:]

        else:
            raise Exception('Invalid Operation trying to be performed')

        selfie_mutated = "".join(x for x in selfie_mutated_chars)
        sf = "".join(x for x in chars_selfie)

        try:
            smiles = decoder(selfie_mutated)
            mol, smiles_canon, done = sanitize_smiles(smiles)
            if len(selfie_mutated_chars
                   ) > max_molecules_len or smiles_canon == "":
                done = False
            if done:
                valid = True
            else:
                valid = False
        except:
            valid = False
            if fail_counter > 1 and write_fail_cases == True:
                f = open("selfie_failure_cases.txt", "a+")
                f.write('Tried to mutate SELFIE: ' + str(sf) + ' To Obtain: ' +
                        str(selfie_mutated) + '\n')
                f.close()

    return (selfie_mutated, smiles_canon)
Exemplo n.º 6
0
             c='black',
             markersize=7,
             linewidth=3)  # TARGETS

clb.set_label('Joint Similarity', fontsize=10)
ax.set_xlabel('LogP', fontsize=10)
ax.set_ylabel('QED', fontsize=10)
plt.xlim([-4, 8])

ax.grid(True)
fig.tight_layout()
# plt.savefig('./logP_v_QED_scatter.png', dpi=1000)

plt.show()

alphabet = list(selfies.get_semantic_robust_alphabet())  # 34 SELFIE characters
max_len_random_struct = max([
    len(get_selfie_chars(encoder(starting_smile))),
    len(get_selfie_chars(encoder(target_smile)))
])
min_len_random_struct = min([
    len(get_selfie_chars(encoder(starting_smile))),
    len(get_selfie_chars(encoder(target_smile)))
])
num_samples = len(logP_path)
random_selfies = []

for _ in range(num_samples):
    selfie = ''

    for i in range(random.randint(min_len_random_struct, max_len_random_struct)