Exemplo n.º 1
0
def _parseMolData(data):
    """Imports a molfile and verifies if all of the coordinates are set to zeros.
    if they are set to zeros then we know there are no real coordinates in the molfile
    In this case we allow RDKit to recaculate the positions of the atoms and come up with its own pictorial representation of the molecule
    If not we use the molecule as drawn"""
    suppl = SDMolSupplier()

    suppl.SetData(str(data), sanitize=False)
    data = [x for x in suppl if x]
    for x in data:
        if not x.HasProp("_drawingBondsWedged"):
            SanitizeMol(x)
        ctab = MolToMolBlock(x)
        ctablines = [
            item.split("0.0000") for item in ctab.split("\n")
            if "0.0000" in item
        ]
        needs_redraw = 0
        for line in ctablines:
            if len(line) > 3:
                needs_redraw += 1
        if needs_redraw == len(ctablines):
            #check for overlapping molecules in the CTAB
            SanitizeMol(x)
            Compute2DCoords(x)
    return data
Exemplo n.º 2
0
 def parse_molblock(self, mb):
     """parse molblock and return mol"""
     #mol = MolFromMolBlock(mb)
     sd = SDMolSupplier()
     sd.SetData(mb)
     mol = next(sd)
     if mol:
         return mol
     else:
         return None
Exemplo n.º 3
0
def rd_kit(dir_sdf = "../data/sdf/"):

    temp_str = "ls " + dir_sdf
    temp = os.popen(temp_str).read()
    temp = str(temp).split()
    bit_length = 1024

    sim_matrix_morgan = []
    sim_matrix_rdk = []
    sim_matrix_aval = []
    sim_matrix_layer = []

    baseline = SDMolSupplier(dir_sdf + temp[0])

    baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length)
    baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)
    baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)
    baseline_layer = AllChem.LayeredFingerprint(baseline[0])
    count = 0
    for item in temp:
        suppl = SDMolSupplier(dir_sdf + item)
        count += 1
        fp = AllChem.GetMorganFingerprint(suppl[0], 2)

        fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 3, nBits=bit_length)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=3)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])

        sim_matrix_morgan.append(
            DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_rdk.append(
            DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_aval.append(
            DataStructs.FingerprintSimilarity(baseline_aval, fp_aval, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_layer.append(
            DataStructs.FingerprintSimilarity(baseline_layer, fp_layer, metric=DataStructs.TanimotoSimilarity))

    sim_matrix_morgan = np.array(sim_matrix_morgan)
    sim_matrix_rdk = np.array(sim_matrix_rdk)
    sim_matrix_aval = np.array(sim_matrix_aval)
    sim_matrix_layer = np.array(sim_matrix_layer)

    label_morgan = "morgan" + str(bit_length)
    plt.hist(sim_matrix_morgan, label = label_morgan)
    plt.hist(sim_matrix_rdk, label = "rdk2")
    #plt.hist(sim_matrix_aval, label = "avalon128")
    #plt.hist(sim_matrix_layer, label = "layer")
    print(np.mean(sim_matrix_rdk))
    print(count)
    plt.xlabel("Similarity to Baseline")
    plt.ylabel("Counts")
    plt.title("Different Fingerprinting Methods, Similarity to Baseline")
    plt.legend()
    plt.show()
Exemplo n.º 4
0
def MCS_NN_search(sdf_file):
    
    sess_dir = split(sdf_file)[0]
    mol = SDMolSupplier(sdf_file, removeHs=True)[0]
    candidate_ids = literal_eval(mol.GetPropsAsDict()['NN'])
    candidate_mols = [SDMolSupplier(join(sess_dir, '%d.sdf'%id), removeHs=True)[0]
                        for id in candidate_ids]
    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs([mol], candidate_mols)
    mol_img = draw_base64(mol, highlightAtoms=MCS_matches[0])
    nn_img = draw_base64(NN_mols[0], highlightAtoms=NN_MCS_matches[0])
    return mol_img, nn_img
Exemplo n.º 5
0
def parse_sdf(contents, filename):
    """Loads contents of an uploaded file and tries to parse as a SDF. Returns
    list of RDKit molecules and status message. Returns empty list and error
    meassage in case of failure."""
    
    content_type, content_string = contents.split(',')
    decoded = b64decode(content_string)
    session_id = str(uuid4())
    
    try:
        if filename[-4:].lower()=='.sdf':
            
            # Generate random file name and save contents to a file
            unique_fname = join('uploads', '%s.sdf'%session_id)
            with open(unique_fname, 'w') as fh:
                fh.write(decoded.decode('utf-8'))
                
            mols = SDMolSupplier(unique_fname, removeHs=False)
            n_mols = len(mols)
            mols = [m for m in mols if m]
            n_sucess = len(mols)
            
            try:
                remove(unique_fname)
            except Exception as e:
                #This is not critical
                print(e)
                
            return mols, 'Loaded %d/%d mols'%(n_mols, n_sucess), session_id
        else:
            return [], 'The file has a wrong format.', session_id
        
    except Exception as e:
        print(e)
        return [], 'Error occured during processing of a file.', session_id
Exemplo n.º 6
0
def file_to_mols(filepath):
    if filepath.endswith('.smi'):
        print('Converting SMILES to list of Mols')
        sys.stdout.flush()
        with open(filepath) as infile:
            smiles_list = [line.rstrip() for line in infile.readlines()]
        # Multiprocessing with all available threads
        #with Pool(processes = os.cpu_count()) as pool:
        #mols = pool.map(smi_to_mol, smiles_list)

        mols = process_map(smi_to_mol,
                           smiles_list,
                           chunksize=100,
                           max_workers=a.worker)

        mols = [m for m in mols if m]

    elif filepath.endswith('.sd') or filepath.endswith('.sdf'):
        mols = [mol for mol in SDMolSupplier(filepath) if mol]

    else:
        raise Exception('Invalid file: {}\n'.format(filepath) +
                        '.smi, .sd, or .sdf extension is expected')

    return mols
Exemplo n.º 7
0
def get_random_mol() -> Mol:

    _mol_supplier = SDMolSupplier(PROCESSED_SDF_PATH)
    _index = randint(0, len(_mol_supplier) - 1)
    assert _mol_supplier[_index]

    return _mol_supplier[_index]
Exemplo n.º 8
0
def rd_kit_morgan(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	bit_length = 256
	sim_matrix_morgan = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		#Note: morgan can output vectors as two types
		fp = AllChem.GetMorganFingerprint(suppl[0], 2)
		fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 2, nBits=bit_length)
		sim_matrix_morgan.append(fp_bit)

	sim_matrix_morgan = np.array(sim_matrix_morgan)
	return sim_matrix_morgan
Exemplo n.º 9
0
def rd_kit_rd(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	sim_matrix_rdk = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
		sim_matrix_rdk.append(DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))

	sim_matrix_rdk = np.array(sim_matrix_rdk)
	return sim_matrix_rdk
Exemplo n.º 10
0
def rd_kit_aval(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	bit_length = 256
	sim_matrix_aval = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])

	baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
		sim_matrix_aval.append(fp_aval)
	sim_matrix_aval = np.array(sim_matrix_aval)
	return sim_matrix_aval
Exemplo n.º 11
0
def test_MCS(sdf_file):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    nns_ids = get_Tanimoto_NNs(mols, mols, 3, fps_nbits=512, order=1, nns=10)

    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs(mols,
                                                          mols,
                                                          nns_indices=nns_ids)
Exemplo n.º 12
0
def split(sdf, label_col, folder, splitfold=5):
    """
    Stratified splitting of dataset into k-folds
    :param mols: Input molecules as dataset
    :param label_col: Column name of labels for stratification
    :param folder: Folder/model name
    :param splitfold: k number of folds
    :return:
    """

    if folder is None:
        sdf_path = pathlib.Path(sdf)
        sdf_name = sdf_path.name.partition('.')[0]

        folder = sdf_path.parent.joinpath(sdf_name)
        if not folder.is_dir():
            folder.mkdir()
        folder = folder.absolute()

    else:
        p = pathlib.Path(folder)
        if not p.is_dir():
            p.mkdir()

    train_files = []
    test_files = []

    sdm = SDMolSupplier(sdf)
    mols = [x for x in sdm]

    labels = []
    for i in range(len(mols)):
        labels.append(mols[i].GetProp(label_col))

    skf = StratifiedKFold(n_splits=splitfold)
    fold = 0
    for train_ix, test_ix in skf.split(mols, labels):
        test_set_fn = "{}/testset_{}.sdf".format(folder, fold)
        train_set_fn = "{}/trainset_{}.sdf".format(folder, fold)

        sdw_train = SDWriter(train_set_fn)
        for i in train_ix:
            sdw_train.write(mols[i])
        sdw_train.close()
        train_files.append(train_set_fn)


        sdw_test = SDWriter(test_set_fn)
        for i in test_ix:
            sdw_test.write(mols[i])
        sdw_test.close()
        test_files.append(test_set_fn)
        fold += 1

    return {'train_files': train_files,
            'test_files': test_files}, folder
Exemplo n.º 13
0
def _parseMolData(data, sanitize=True, removeHs=True, strictParsing=True):
    fd, fpath = tempfile.mkstemp(text=True)
    os.write(fd, data)
    os.close(fd)
    suppl = SDMolSupplier(fpath,
                          sanitize=sanitize,
                          removeHs=removeHs,
                          strictParsing=strictParsing)
    res = [x for x in suppl if x]
    os.remove(fpath)
    return res
Exemplo n.º 14
0
def morgan(bit_length=256, dir="../data/sdf/DB3/", bit=True):

    morgan = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)

            if (bit == True):
                try:
                    fp = AllChem.GetMorganFingerprintAsBitVect(
                        suppl[0], int(2), nBits=int(bit_length))
                except:
                    pass
            else:
                try:
                    fp = AllChem.GetMorganFingerprint(suppl[0], int(2))
                except:
                    print("error")
                    pass

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                morgan.append(fp)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        morgan.append(fp)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    morgan = np.array(morgan)
    return names, morgan, h**o, homo1, diff
Exemplo n.º 15
0
def layer(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    layer = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])
        layer.append(fp_layer)
    layer = np.array(layer)
    return layer
Exemplo n.º 16
0
def rdk(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    rdk = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
        rdk.append(fp_rdk)
    rdk = np.array(rdk)
    return rdk
Exemplo n.º 17
0
def sdf_to_info(sdf_folder, save_folder):
    sdf_files = glob(osp.join(sdf_folder, "*.sdf"))
    result = pd.DataFrame()
    for sdf in sdf_files:
        f_id = osp.basename(sdf).split(".")[0]
        this_info = {"file_name": f_id}
        mol = list(SDMolSupplier(sdf))[0]
        this_info["SMILES"] = mol.GetProp("SMILES")
        this_info["n_heavy"] = mol.GetNumHeavyAtoms()
        result = result.append(this_info, ignore_index=True)
    result = result.sort_values(by="n_heavy")
    result.to_csv(osp.join(save_folder, "info.csv"), index=False)
Exemplo n.º 18
0
def aval(dir="../data/sdf/DB/", bit_length=128):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    avalon = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)
        avalon.append(fp_aval)

    avalon = np.array(avalon)
    return avalon
Exemplo n.º 19
0
def rd_kit_morgan(dir_sdf = "../data/sdf/"):
	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	sim_matrix_layer = []

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_layer = AllChem.LayeredFingerprint(suppl[0])
		sim_matrix_layer.append(fp_layer)
		sim_matrix_layer = np.array(sim_matrix_layer)

	return sim_matrix_layer
Exemplo n.º 20
0
def load_props(mols_dir):
    
    props = []
    for f in glob(join(mols_dir, '*.sdf')):
        p = SDMolSupplier(f)[0].GetPropsAsDict()
        p['id'] = split(f)[-1].replace('.sdf', '')
        props.append(p)
    df = pd.DataFrame(props)
    
    # Limit dataframe only to necessary columns    
    sorted_cols = []
    for r in sorted(set([c.split('_')[0] for c in df.columns if 'prediction' in c])):
        sorted_cols += ['%s_experimental'%r, '%s_prediction'%r, '%s_error'%r]
    sorted_cols = ['molId']+sorted_cols+['Similarity_Tanimoto', 'NN', 'id']
    
    return df[sorted_cols]
Exemplo n.º 21
0
def min_sdf():
    files = glob("raw/openchem_logP_confs/*.sdf")
    for f in tqdm(files):
        try:
            suppl = SDMolSupplier(f, removeHs=False)
            lowest_e = np.inf
            selected_mol = None
            for mol in suppl:
                energy = float(mol.GetProp("energy_abs"))
                if energy < lowest_e:
                    lowest_e = energy
                    selected_mol = mol
            if selected_mol is not None:
                writer = SDWriter(f"raw/openchem_logP_mmff_sdfs/{osp.basename(f).split('.')[0].split('_')[0]}.mmff.sdf")
                writer.write(selected_mol)
        except Exception as e:
            print(e)
Exemplo n.º 22
0
def test_preprocess_mols(sdf_file, session_id):

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    df = preprocess_mols(mols, session_id)

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    return df
Exemplo n.º 23
0
def rdk(dir="../data/sdf/DB/"):

    rdk = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                rdk.append(fp_rdk)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        rdk.append(fp_rdk)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    rdk = np.array(rdk)
    return names, rdk, h**o, homo1, diff
Exemplo n.º 24
0
def check_sdf_matches(dg,
                      sdf_file,
                      draw_structures=True,
                      print_unmatching=False):
    """
	After generating the network, try to see if any structures match with those in SDF files
	These files were usually created manually, storing structures reported in experimental
	studies. The purpose is to match our simulations with experiments.

	Keyword arguments:
	dg			-- the derivation graph of the network
	sdf_file	-- path to the SDF file
	draw_structures -- whether or not to print the structures in the summary pdf
	"""
    matching_structs = []
    not_matching = []
    postSection('Matching Structures')
    print(f"Checking for matches with structures in {sdf_file}")
    sdfile = SDMolSupplier(sdf_file)
    for mol in sdfile:
        Kekulize(mol)
        smi = MolToSmiles(mol, kekuleSmiles=True)
        mol_graph = smiles(smi, add=False)
        for v in dg.vertices:  #dg_new.vertices
            if v.graph.isomorphism(mol_graph) == 1:
                matching_structs.append(mol_graph)
                print(
                    f"Structure {v.graph.smiles} in the network matches a test set molecule!"
                )
        if mol_graph not in matching_structs:
            not_matching.append(mol_graph)
    if draw_structures == True:
        for g in matching_structs:
            g.print(p)
    if print_unmatching == True:
        postSection("Structures not matched yet")
        for g in not_matching:
            g.print(p)

    print(
        f"{len(matching_structs)} of {len(sdfile)} ({100* len(matching_structs)/len(sdfile)}%)  total structures in the SDF are in the reaction network."
    )
Exemplo n.º 25
0
def morgan(bit_length=256, dir="../data/sdf/DB/", bit=True):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    morgan = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)

        if (bit == True):
            fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0],
                                                           2,
                                                           nBits=bit_length)
            morgan.append(fp_bit)
        else:
            fp = AllChem.GetMorganFingerprint(suppl[0], 2)
            morgan.append(fp)

    morgan = np.array(morgan)
    return morgan
Exemplo n.º 26
0
def aval(dir="../data/sdf/DB/", bit_length=256):
    aval = []
    names = []
    h**o = []
    homo1 = []
    diff = []
    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                aval.append(fp_aval)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        aval.append(fp_aval)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    aval = np.array(layer)
    return names, aval, h**o, homo1, diff
Exemplo n.º 27
0
def time_fps(sdf_file, png_file, radius):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = [m for m in mols if m]

    fps_sizes = np.arange(5, 12)
    corrs, timings = speed_tests(mols, radius, 2**fps_sizes)

    sns.set(font_scale=1.5)
    f, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=300)
    ax.plot(fps_sizes, corrs, '-o')

    ax.set_xlabel('size of fingerprint ($log_2$ scale)')
    ax.set_ylabel('Correlation')
    ax.set_xticks(fps_sizes)
    ax.set_xticklabels(['%d' % 2**p for p in fps_sizes])

    for i, t in enumerate(timings):
        ax.annotate('%.1fs' % t, (fps_sizes[i], corrs[i] - 0.05),
                    fontsize='small')

    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, right=0.95)
    f.savefig(png_file)
Exemplo n.º 28
0
def docking(k):
    # mol_id = k.split("/")[-1]
    mol_id = k
    protein = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_protein.pdb")
    protein_pdbqt = os.path.join(pdbbind_dir, mol_id,
                                 f"{mol_id}_protein.pdbqt")
    ligand = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.sdf")
    ligand_mol2 = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.mol2")
    log_name = os.path.join(log_dir, f'{mol_id}.log')
    out_name = os.path.join(out_pdbqt_dir, f'{mol_id}_out.pdbqt')
    pdb_name = os.path.join(pdb_dir, f'{mol_id}.pdb')
    pdbqt_name = os.path.join(pdbqt_dir, f'{mol_id}.pdbqt')
    ligand_rcsb = os.path.join(rcsb_dir, mol_id, f"{mol_id}.sdf")

    if os.path.exists(out_name):
        return

    # Generate 3D structure of ligand
    # m = Chem.MolFromSmiles(smiles)
    m = SDMolSupplier(ligand)[0]
    if m is None and os.path.exists(ligand_rcsb):
        m = SDMolSupplier(ligand_rcsb)[0]

    if m is None:
        m = Chem.MolFromMol2File(ligand_mol2)

    if m is None:
        return

    Chem.SanitizeMol(m)

    # Adding hydrogen atoms to molecule
    m = Chem.AddHs(m)
    cids = AllChem.EmbedMultipleConfs(
        m,
        numConfs=20,
    )
    cenergy = []
    for conf in cids:
        converged = not AllChem.UFFOptimizeMolecule(m, confId=conf)
        cenergy.append(
            AllChem.UFFGetMoleculeForceField(m, confId=conf).CalcEnergy())
    min_idx = cenergy.index(min(cenergy))

    m = Chem.RemoveHs(m)
    w = PDBWriter(pdb_name)
    w.write(m, min_idx)
    w.close()

    # pdb to pdbqt (both of ligand and protein)
    if not os.path.exists(pdbqt_name):
        os.system(f'obabel {pdb_name} -O {pdbqt_name}')
    if not os.path.exists(protein_pdbqt):
        os.system(f'obabel {protein} -O {protein_pdbqt}')

    command = f"smina \
            -r {protein_pdbqt} \
            -l {pdbqt_name} \
            --autobox_ligand {ligand} \
            --autobox_add 8 \
            --exhaustiveness 8 \
            --log {log_name} \
            -o {out_name} \
            --cpu 1 \
            --num_modes 100 \
            --seed 0"

    os.system(command)
Exemplo n.º 29
0
def _parseMolData(data):
    suppl = SDMolSupplier()
    suppl.SetData(str(data))
    return [x for x in suppl if x]
from sys import argv

from rdkit.Chem import SDMolSupplier, SmilesWriter

sdm = SDMolSupplier(argv[1])
sw = SmilesWriter(argv[2], includeHeader=False, nameHeader='_Name')
for mol in sdm:
    sw.write(mol)
sw.close()