Exemplo n.º 1
0
def score(s, target):
    mol1 = Chem.MolFromSmiles(target)
    mol2 = Chem.MolFromSmiles(s)
    fp1 = get_FCFP6(mol1)
    fp2 = get_FCFP6(mol2)
    score = TanimotoSimilarity(fp1, fp2)
    return score
Exemplo n.º 2
0
def ttx_score(s, target):
  mol1 = Chem.MolFromSmiles(target)
  mol2 = s
  fp1 = get_ECFP4(mol1)
  fp2 = get_ECFP4(mol2)
  score = TanimotoSimilarity(fp1, fp2)
  return score
Exemplo n.º 3
0
def similarity(individual):
    final_vector = [0.0 for x in range(256)]
    individual_latent_vector = [x for x in individual]
    counter = 0
    for i in range(256):
        if i in non_zero_index:
            final_vector[i] = individual_latent_vector[counter]
            counter += 1

    final_vector = np.reshape(final_vector, (1, 256))
    smiles = latent_to_smiles(charset,
                              smiles_len,
                              char_to_int,
                              int_to_char,
                              latent_to_states_model,
                              sample_model,
                              final_vector,
                              type='2_layers')
    molecule = Chem.MolFromSmiles(smiles)
    if molecule and smiles is not '' and len(smiles) != 1:
        try:
            mol_fp = GetAvalonFP(molecule, 512)
            ref = GetAvalonFP(
                Chem.MolFromSmiles('CC(C)Cc1ccc(cc1)[C@@H](C)C(=O)O'), 512)
            dissimilarity_to_ref = (1 - TanimotoSimilarity(mol_fp, ref))
            print(Chem.MolToSmiles(molecule))
            print(dissimilarity_to_ref)
            return dissimilarity_to_ref,
        except:
            return 9999,
    else:
        return 9999,
Exemplo n.º 4
0
 def get_best_difference_score(self, target_fp, metric):
     scores = {}
     for rxn_id in self.fingerprints:
         _, comp_fp = self.fingerprints[rxn_id]
         score = TanimotoSimilarity(target_fp, comp_fp)
         #score = FingerprintSimilarity(fp1=target_fp, fp2=comp_fp, metric=metric)
         scores[rxn_id] = score
     return scores
Exemplo n.º 5
0
 def __call__(self, molecule) -> None:
     """
     Updates the fitness value of a molecule.
     """
     molecular_graph = Chem.MolFromSmiles(Chem.CanonSmiles(molecule.smiles))
     molecule_fingerprint = self.get_fingerprint(molecular_graph, self.fingerprint_type)
     fitness = TanimotoSimilarity(self.target_fingerprint, molecule_fingerprint)
     molecule.fitness = fitness
     return molecule
Exemplo n.º 6
0
def get_fp_scores(smiles_back, target_smi):
    smiles_back_scores = []
    target = Chem.MolFromSmiles(target_smi)
    fp_target = get_ECFP4(target)
    for item in smiles_back:
        mol = Chem.MolFromSmiles(item)
        fp_mol = get_ECFP4(mol)
        score = TanimotoSimilarity(fp_mol, fp_target)
        smiles_back_scores.append(score)
    return smiles_back_scores
Exemplo n.º 7
0
 def __call__(self, molecule: Chem.Mol) -> float:
     smiles = Chem.MolToSmiles(molecule)
     if smiles in self.memoized_cache:
         fitness = self.memoized_cache[smiles]
     else:
         molecule_fingerprint = self.get_fingerprint(
             molecule, self.fingerprint_type)
         fitness = TanimotoSimilarity(self.target_fingerprint,
                                      molecule_fingerprint)
         self.memoized_cache[smiles] = fitness
     return fitness
Exemplo n.º 8
0
 def get_best_difference_score(self, smarts, metric):
     scores = {}
     diff_score_high = 0
     _, target_fp = ReactionFingerprintMatcher.make_fingerprints(smarts)
     for rxn_id in self.fingerprints:
         _, comp_fp = self.fingerprints[rxn_id]
         score = TanimotoSimilarity(target_fp, comp_fp)
         scores[rxn_id] = score
         if score > diff_score_high:
             diff_score_high = score
     return scores, diff_score_high
def check_file(filename, ref=ref, limit=100000):
    results = []
    path = pathlib.Path(filename)
    file_type = path.suffix
    if file_type == ".smiles":
        with open(filename) as f:
            for line_no, row in enumerate(f):
                try:
                    result = process_row_str(row)
                    if result is not None:
                        results.append(result)
                        # # Save results every N found molecules
                        if len(results) > 1000:
                            dump(results)
                            results = []
                except:
                    print(f"Failed to read line {line_no}")
                if line_no >= limit:
                    break
        return results
    elif file_type == ".pkl":
        id = find_int(filename)
        fname_result = 'results_blob_{:02}'.format(id)
        subfolder = check_for_part_in_path(path)
        try:
            #abort in case file already exists.
            dump = DumpsResults(folder=os.path.join(args.reference_mol,
                                                    subfolder),
                                path=outpath,
                                fname=fname_result,
                                overwrite=args.force)
            fp_in_mem = read_blob(filename)
            for _tuple in fp_in_mem:
                try:
                    idx, fp, smiles = _tuple
                    tanimoto_sim = TanimotoSimilarity(ref, fp)
                    if tanimoto_sim >= args.tanimoto_threshold:
                        result = make_string(smiles, idx, tanimoto_sim)
                        results.append(result)
                        # # Save results every N found molecules
                        if len(results) >= 1000:
                            dump(results)
                            results = []
                except TypeError as e:
                    print(e)
            dump(results)
            print(f"Checked for {len(fp_in_mem)} molecules.")
        except FileExistsError:
            print(f"Files for blob already exist: f{filename} ")
    else:
        raise ValueError("Filetype unkown: {}".format(file_type))
    return
Exemplo n.º 10
0
def rediscovery(mol, args):
    target = args[0]
    try:
        fp_mol = get_ECFP4(mol)
        fp_target = get_ECFP4(target)

        score = TanimotoSimilarity(fp_mol, fp_target)

        return score

    except:
        print('Failed ', Chem.MolToSmiles(mol))
        return None
def process_row_str(row: str, ref=ref):
    """
    Inputs:
        row: str
        First two tab-seperated entries of string are the 
        SMILES and ID of the row.
    Return:
        str: 
        If the tanimoto threshold is passed, return the 
        smile, the id and the tanimoto coef. in smiles-format:
        "SMILES\tID\tTANIMOTOCOEF"
    """
    smiles, id, *_ = row.split("\t")
    mol = Chem.MolFromSmiles(smiles)
    fp = Fingerprint(mol)  #this takes the bulk of time
    # tanimoto_sim = DataStructs.TanimotoSimilarity(reference, mol)
    tanimoto_sim = TanimotoSimilarity(ref, fp)
    if tanimoto_sim >= args.tanimoto_threshold:
        print(f"Added Molecule to results from line: {line}")
        return make_string(smiles, id, tanimoto_sim)
Exemplo n.º 12
0
def rediscovery(mol,target):
  global max_score
  global count
  try:
    fp_mol = get_ECFP4(mol)
    fp_target = get_ECFP4(target)

    #print('got fp')

    score = TanimotoSimilarity(fp_mol, fp_target)

    #print('got score',score)

    count += 1
    if score > max_score[0]:
      max_score = [score, Chem.MolToSmiles(mol)]

    return score
  
  except:
    print('Failed ',Chem.MolToSmiles(mol))
    return None
Exemplo n.º 13
0
def get_fp_scores(smiles_back, target_smi): 
    '''Calculate the Tanimoto fingerprint (ECFP4 fingerint) similarity between a list 
       of SMILES and a known target structure (target_smi). 
       
    Parameters:
    smiles_back   (list) : A list of valid SMILES strings 
    target_smi (string)  : A valid SMILES string. Each smile in 'smiles_back' will be compared to this stucture
    
    Returns: 
    smiles_back_scores (list of floats) : List of fingerprint similarities
    '''
    smiles_back_scores = []
    target    = Chem.MolFromSmiles(target_smi)
    fp_target = get_ECFP4(target)
    for item in smiles_back: 
        try: 
            mol    = Chem.MolFromSmiles(item)
        except: 
            print('Invalid smile: ', item)
        fp_mol = get_ECFP4(mol)
        score  = TanimotoSimilarity(fp_mol, fp_target)
        smiles_back_scores.append(score)
    return smiles_back_scores
Exemplo n.º 14
0
def problem_drug_likeness(individual):

    final_vector = [0.0 for x in range(256)]
    individual_latent_vector = [x for x in individual]
    counter = 0
    for i in range(256):
        if i in non_zero_index:
            final_vector[i] = individual_latent_vector[counter]
            counter += 1

    final_vector = np.reshape(final_vector, (1, 256))
    smiles = latent_to_smiles(charset,
                              smiles_len,
                              char_to_int,
                              int_to_char,
                              latent_to_states_model,
                              sample_model,
                              final_vector,
                              type='2_layers')
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        try:
            logP = Descriptors.MolLogP(molecule)
            logP_score = (1.575 - logP)**2
            SA_score = calculateScore(molecule)
            print(Chem.MolToSmiles(molecule))
            bad_drug = logP_score + SA_score
            mol_fp = AllChem.GetMorganFingerprintAsBitVect(molecule, 2)
            ref = AllChem.GetMorganFingerprintAsBitVect(
                Chem.MolFromSmiles('c1ccccc1'), 2)
            dissimilarity_to_ref = (1 - TanimotoSimilarity(mol_fp, ref))
            print((bad_drug, dissimilarity_to_ref))
            return bad_drug, dissimilarity_to_ref
        except:
            return 9999, 9999
    else:
        return 9999, 9999
Exemplo n.º 15
0
def get_fp_scores(smiles_back, target_smi, fp_type):
    '''Calculate the Tanimoto fingerprint (using fp_type fingerint) similarity between a list 
       of SMILES and a known target structure (target_smi). 
       
    Parameters:
    smiles_back   (list) : A list of valid SMILES strings 
    target_smi (string)  : A valid SMILES string. Each smile in 'smiles_back' will be compared to this stucture
    fp_type (string)     : Type of fingerprint  (choices: AP/PHCO/BPF,BTF,PAT,ECFP4,ECFP6,FCFP4,FCFP6) 
    
    Returns: 
    smiles_back_scores (list of floats) : List of fingerprint similarities
    '''
    smiles_back_scores = []
    target = Chem.MolFromSmiles(target_smi)

    fp_target = get_fingerprint(target, fp_type)

    for item in smiles_back:
        mol = Chem.MolFromSmiles(item)
        # fp_mol = get_ECFP4(mol)
        fp_mol = get_fingerprint(mol, fp_type)
        score = TanimotoSimilarity(fp_mol, fp_target)
        smiles_back_scores.append(score)
    return smiles_back_scores
 def _get_tanimoto_similarity(self, smiles1, smiles2):
     mf1 = self._smiles2fp(smiles1)
     mf2 = self._smiles2fp(smiles2)
     Tanimoto_score = TanimotoSimilarity(mf1, mf2)
     return Tanimoto_score
Exemplo n.º 17
0
    def multiprocess_find_similarity(self, _query_fp, _ref_fp, _ref_smi):

        new_dict = {}
        tanimoto = round(TanimotoSimilarity(_query_fp, _ref_fp), 3)
        new_dict[_ref_smi] = {'tanimoto': tanimoto}
        return new_dict
Exemplo n.º 18
0
 def score_mol(self, mol: Chem.Mol) -> float:
     fp = get_fingerprint(mol, self.fp_type)
     return TanimotoSimilarity(fp, self.ref_fp)
Exemplo n.º 19
0
target = Chem.MolFromSmiles(Celecoxib)
fp_target = sc.get_ECFP4(target)

pd.set_option('max_colwidth',200)
df = pd.read_csv('ZINC_250k.smi', sep=" ", header=None)
df.columns = ["smiles"]

rows = 1000
scores = []
for index, row in df.iloc[0:rows].iterrows():
	smiles = row['smiles']
	#print(smiles)
	mol = Chem.MolFromSmiles(smiles)
	#print(mol,Chem.MolToSmiles(mol))
	fp_mol = sc.get_ECFP4(mol)
	score = TanimotoSimilarity(fp_mol,fp_target)
	#print(score)
	scores.append(score)

df2 = df.iloc[0:rows]
#print(df2)
#print(pd.DataFrame(scores, columns=['scores']))

df3 = pd.DataFrame(scores, columns=['score'])

df2 = df2.join(df3['score'])

df2.sort_values(by=['score'], ascending=False, inplace=True)

#print(df2)