Python SDMolSupplier示例，rdkit.Chem.SDMolSupplier Python示例

示例#1

0

显示文件

def _parseMolData(data):
    """Imports a molfile and verifies if all of the coordinates are set to zeros.
    if they are set to zeros then we know there are no real coordinates in the molfile
    In this case we allow RDKit to recaculate the positions of the atoms and come up with its own pictorial representation of the molecule
    If not we use the molecule as drawn"""
    suppl = SDMolSupplier()

    suppl.SetData(str(data), sanitize=False)
    data = [x for x in suppl if x]
    for x in data:
        if not x.HasProp("_drawingBondsWedged"):
            SanitizeMol(x)
        ctab = MolToMolBlock(x)
        ctablines = [
            item.split("0.0000") for item in ctab.split("\n")
            if "0.0000" in item
        ]
        needs_redraw = 0
        for line in ctablines:
            if len(line) > 3:
                needs_redraw += 1
        if needs_redraw == len(ctablines):
            #check for overlapping molecules in the CTAB
            SanitizeMol(x)
            Compute2DCoords(x)
    return data

示例#2

0

显示文件

 def parse_molblock(self, mb):
     """parse molblock and return mol"""
     #mol = MolFromMolBlock(mb)
     sd = SDMolSupplier()
     sd.SetData(mb)
     mol = next(sd)
     if mol:
         return mol
     else:
         return None

示例#3

0

显示文件

def rd_kit(dir_sdf = "../data/sdf/"):

    temp_str = "ls " + dir_sdf
    temp = os.popen(temp_str).read()
    temp = str(temp).split()
    bit_length = 1024

    sim_matrix_morgan = []
    sim_matrix_rdk = []
    sim_matrix_aval = []
    sim_matrix_layer = []

    baseline = SDMolSupplier(dir_sdf + temp[0])

    baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length)
    baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)
    baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)
    baseline_layer = AllChem.LayeredFingerprint(baseline[0])
    count = 0
    for item in temp:
        suppl = SDMolSupplier(dir_sdf + item)
        count += 1
        fp = AllChem.GetMorganFingerprint(suppl[0], 2)

        fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 3, nBits=bit_length)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=3)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])

        sim_matrix_morgan.append(
            DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_rdk.append(
            DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_aval.append(
            DataStructs.FingerprintSimilarity(baseline_aval, fp_aval, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_layer.append(
            DataStructs.FingerprintSimilarity(baseline_layer, fp_layer, metric=DataStructs.TanimotoSimilarity))

    sim_matrix_morgan = np.array(sim_matrix_morgan)
    sim_matrix_rdk = np.array(sim_matrix_rdk)
    sim_matrix_aval = np.array(sim_matrix_aval)
    sim_matrix_layer = np.array(sim_matrix_layer)

    label_morgan = "morgan" + str(bit_length)
    plt.hist(sim_matrix_morgan, label = label_morgan)
    plt.hist(sim_matrix_rdk, label = "rdk2")
    #plt.hist(sim_matrix_aval, label = "avalon128")
    #plt.hist(sim_matrix_layer, label = "layer")
    print(np.mean(sim_matrix_rdk))
    print(count)
    plt.xlabel("Similarity to Baseline")
    plt.ylabel("Counts")
    plt.title("Different Fingerprinting Methods, Similarity to Baseline")
    plt.legend()
    plt.show()

示例#4

0

显示文件

文件： utils.py 项目： MikolajMizera/SelVie

def MCS_NN_search(sdf_file):
    
    sess_dir = split(sdf_file)[0]
    mol = SDMolSupplier(sdf_file, removeHs=True)[0]
    candidate_ids = literal_eval(mol.GetPropsAsDict()['NN'])
    candidate_mols = [SDMolSupplier(join(sess_dir, '%d.sdf'%id), removeHs=True)[0]
                        for id in candidate_ids]
    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs([mol], candidate_mols)
    mol_img = draw_base64(mol, highlightAtoms=MCS_matches[0])
    nn_img = draw_base64(NN_mols[0], highlightAtoms=NN_MCS_matches[0])
    return mol_img, nn_img

示例#5

0

显示文件

文件： utils.py 项目： MikolajMizera/SelVie

def parse_sdf(contents, filename):
    """Loads contents of an uploaded file and tries to parse as a SDF. Returns
    list of RDKit molecules and status message. Returns empty list and error
    meassage in case of failure."""
    
    content_type, content_string = contents.split(',')
    decoded = b64decode(content_string)
    session_id = str(uuid4())
    
    try:
        if filename[-4:].lower()=='.sdf':
            
            # Generate random file name and save contents to a file
            unique_fname = join('uploads', '%s.sdf'%session_id)
            with open(unique_fname, 'w') as fh:
                fh.write(decoded.decode('utf-8'))
                
            mols = SDMolSupplier(unique_fname, removeHs=False)
            n_mols = len(mols)
            mols = [m for m in mols if m]
            n_sucess = len(mols)
            
            try:
                remove(unique_fname)
            except Exception as e:
                #This is not critical
                print(e)
                
            return mols, 'Loaded %d/%d mols'%(n_mols, n_sucess), session_id
        else:
            return [], 'The file has a wrong format.', session_id
        
    except Exception as e:
        print(e)
        return [], 'Error occured during processing of a file.', session_id

示例#6

0

显示文件

def file_to_mols(filepath):
    if filepath.endswith('.smi'):
        print('Converting SMILES to list of Mols')
        sys.stdout.flush()
        with open(filepath) as infile:
            smiles_list = [line.rstrip() for line in infile.readlines()]
        # Multiprocessing with all available threads
        #with Pool(processes = os.cpu_count()) as pool:
        #mols = pool.map(smi_to_mol, smiles_list)

        mols = process_map(smi_to_mol,
                           smiles_list,
                           chunksize=100,
                           max_workers=a.worker)

        mols = [m for m in mols if m]

    elif filepath.endswith('.sd') or filepath.endswith('.sdf'):
        mols = [mol for mol in SDMolSupplier(filepath) if mol]

    else:
        raise Exception('Invalid file: {}\n'.format(filepath) +
                        '.smi, .sd, or .sdf extension is expected')

    return mols

示例#7

0

显示文件

文件： get_random_mol.py 项目： xduan7/biochem-graph

def get_random_mol() -> Mol:

    _mol_supplier = SDMolSupplier(PROCESSED_SDF_PATH)
    _index = randint(0, len(_mol_supplier) - 1)
    assert _mol_supplier[_index]

    return _mol_supplier[_index]

示例#8

0

显示文件

文件： descriptor_test.py 项目： santi921/ML_CO2

def rd_kit_morgan(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	bit_length = 256
	sim_matrix_morgan = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		#Note: morgan can output vectors as two types
		fp = AllChem.GetMorganFingerprint(suppl[0], 2)
		fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 2, nBits=bit_length)
		sim_matrix_morgan.append(fp_bit)

	sim_matrix_morgan = np.array(sim_matrix_morgan)
	return sim_matrix_morgan

示例#9

0

显示文件

文件： descriptor_test.py 项目： santi921/ML_CO2

def rd_kit_rd(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	sim_matrix_rdk = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
		sim_matrix_rdk.append(DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))

	sim_matrix_rdk = np.array(sim_matrix_rdk)
	return sim_matrix_rdk

示例#10

0

显示文件

文件： descriptor_test.py 项目： santi921/ML_CO2

def rd_kit_aval(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	bit_length = 256
	sim_matrix_aval = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])

	baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
		sim_matrix_aval.append(fp_aval)
	sim_matrix_aval = np.array(sim_matrix_aval)
	return sim_matrix_aval

示例#11

0

显示文件

def test_MCS(sdf_file):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    nns_ids = get_Tanimoto_NNs(mols, mols, 3, fps_nbits=512, order=1, nns=10)

    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs(mols,
                                                          mols,
                                                          nns_indices=nns_ids)

示例#12

0

显示文件

def split(sdf, label_col, folder, splitfold=5):
    """
    Stratified splitting of dataset into k-folds
    :param mols: Input molecules as dataset
    :param label_col: Column name of labels for stratification
    :param folder: Folder/model name
    :param splitfold: k number of folds
    :return:
    """

    if folder is None:
        sdf_path = pathlib.Path(sdf)
        sdf_name = sdf_path.name.partition('.')[0]

        folder = sdf_path.parent.joinpath(sdf_name)
        if not folder.is_dir():
            folder.mkdir()
        folder = folder.absolute()

    else:
        p = pathlib.Path(folder)
        if not p.is_dir():
            p.mkdir()

    train_files = []
    test_files = []

    sdm = SDMolSupplier(sdf)
    mols = [x for x in sdm]

    labels = []
    for i in range(len(mols)):
        labels.append(mols[i].GetProp(label_col))

    skf = StratifiedKFold(n_splits=splitfold)
    fold = 0
    for train_ix, test_ix in skf.split(mols, labels):
        test_set_fn = "{}/testset_{}.sdf".format(folder, fold)
        train_set_fn = "{}/trainset_{}.sdf".format(folder, fold)

        sdw_train = SDWriter(train_set_fn)
        for i in train_ix:
            sdw_train.write(mols[i])
        sdw_train.close()
        train_files.append(train_set_fn)


        sdw_test = SDWriter(test_set_fn)
        for i in test_ix:
            sdw_test.write(mols[i])
        sdw_test.close()
        test_files.append(test_set_fn)
        fold += 1

    return {'train_files': train_files,
            'test_files': test_files}, folder

示例#13

0

显示文件

文件： io.py 项目： bkbonde/chembl_beaker

def _parseMolData(data, sanitize=True, removeHs=True, strictParsing=True):
    fd, fpath = tempfile.mkstemp(text=True)
    os.write(fd, data)
    os.close(fd)
    suppl = SDMolSupplier(fpath,
                          sanitize=sanitize,
                          removeHs=removeHs,
                          strictParsing=strictParsing)
    res = [x for x in suppl if x]
    os.remove(fpath)
    return res

示例#14

0

显示文件

文件： helpers.py 项目： santi921/ML_CO2

def morgan(bit_length=256, dir="../data/sdf/DB3/", bit=True):

    morgan = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)

            if (bit == True):
                try:
                    fp = AllChem.GetMorganFingerprintAsBitVect(
                        suppl[0], int(2), nBits=int(bit_length))
                except:
                    pass
            else:
                try:
                    fp = AllChem.GetMorganFingerprint(suppl[0], int(2))
                except:
                    print("error")
                    pass

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                morgan.append(fp)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        morgan.append(fp)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    morgan = np.array(morgan)
    return names, morgan, h**o, homo1, diff

示例#15

0

显示文件

文件： helpers.py 项目： dbim-chem/ML_CO2

def layer(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    layer = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])
        layer.append(fp_layer)
    layer = np.array(layer)
    return layer

示例#16

0

显示文件

文件： helpers.py 项目： dbim-chem/ML_CO2

def rdk(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    rdk = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
        rdk.append(fp_rdk)
    rdk = np.array(rdk)
    return rdk

示例#17

0

显示文件

def sdf_to_info(sdf_folder, save_folder):
    sdf_files = glob(osp.join(sdf_folder, "*.sdf"))
    result = pd.DataFrame()
    for sdf in sdf_files:
        f_id = osp.basename(sdf).split(".")[0]
        this_info = {"file_name": f_id}
        mol = list(SDMolSupplier(sdf))[0]
        this_info["SMILES"] = mol.GetProp("SMILES")
        this_info["n_heavy"] = mol.GetNumHeavyAtoms()
        result = result.append(this_info, ignore_index=True)
    result = result.sort_values(by="n_heavy")
    result.to_csv(osp.join(save_folder, "info.csv"), index=False)

示例#18

0

显示文件

文件： helpers.py 项目： dbim-chem/ML_CO2

def aval(dir="../data/sdf/DB/", bit_length=128):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    avalon = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)
        avalon.append(fp_aval)

    avalon = np.array(avalon)
    return avalon

示例#19

0

显示文件

文件： descriptor_test.py 项目： santi921/ML_CO2

def rd_kit_morgan(dir_sdf = "../data/sdf/"):
	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	sim_matrix_layer = []

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_layer = AllChem.LayeredFingerprint(suppl[0])
		sim_matrix_layer.append(fp_layer)
		sim_matrix_layer = np.array(sim_matrix_layer)

	return sim_matrix_layer

示例#20

0

显示文件

文件： utils.py 项目： MikolajMizera/SelVie

def load_props(mols_dir):
    
    props = []
    for f in glob(join(mols_dir, '*.sdf')):
        p = SDMolSupplier(f)[0].GetPropsAsDict()
        p['id'] = split(f)[-1].replace('.sdf', '')
        props.append(p)
    df = pd.DataFrame(props)
    
    # Limit dataframe only to necessary columns    
    sorted_cols = []
    for r in sorted(set([c.split('_')[0] for c in df.columns if 'prediction' in c])):
        sorted_cols += ['%s_experimental'%r, '%s_prediction'%r, '%s_error'%r]
    sorted_cols = ['molId']+sorted_cols+['Similarity_Tanimoto', 'NN', 'id']
    
    return df[sorted_cols]

示例#21

0

显示文件

文件： min_sdf.py 项目： SongXia-NYU/dataProviders

def min_sdf():
    files = glob("raw/openchem_logP_confs/*.sdf")
    for f in tqdm(files):
        try:
            suppl = SDMolSupplier(f, removeHs=False)
            lowest_e = np.inf
            selected_mol = None
            for mol in suppl:
                energy = float(mol.GetProp("energy_abs"))
                if energy < lowest_e:
                    lowest_e = energy
                    selected_mol = mol
            if selected_mol is not None:
                writer = SDWriter(f"raw/openchem_logP_mmff_sdfs/{osp.basename(f).split('.')[0].split('_')[0]}.mmff.sdf")
                writer.write(selected_mol)
        except Exception as e:
            print(e)

示例#22

0

显示文件

def test_preprocess_mols(sdf_file, session_id):

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    df = preprocess_mols(mols, session_id)

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    return df

示例#23

0

显示文件

文件： helpers.py 项目： santi921/ML_CO2

def rdk(dir="../data/sdf/DB/"):

    rdk = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                rdk.append(fp_rdk)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        rdk.append(fp_rdk)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    rdk = np.array(rdk)
    return names, rdk, h**o, homo1, diff

示例#24

0

显示文件

文件： main.py 项目： pk-organics/reac-space-exp

def check_sdf_matches(dg,
                      sdf_file,
                      draw_structures=True,
                      print_unmatching=False):
    """
	After generating the network, try to see if any structures match with those in SDF files
	These files were usually created manually, storing structures reported in experimental
	studies. The purpose is to match our simulations with experiments.

	Keyword arguments:
	dg			-- the derivation graph of the network
	sdf_file	-- path to the SDF file
	draw_structures -- whether or not to print the structures in the summary pdf
	"""
    matching_structs = []
    not_matching = []
    postSection('Matching Structures')
    print(f"Checking for matches with structures in {sdf_file}")
    sdfile = SDMolSupplier(sdf_file)
    for mol in sdfile:
        Kekulize(mol)
        smi = MolToSmiles(mol, kekuleSmiles=True)
        mol_graph = smiles(smi, add=False)
        for v in dg.vertices:  #dg_new.vertices
            if v.graph.isomorphism(mol_graph) == 1:
                matching_structs.append(mol_graph)
                print(
                    f"Structure {v.graph.smiles} in the network matches a test set molecule!"
                )
        if mol_graph not in matching_structs:
            not_matching.append(mol_graph)
    if draw_structures == True:
        for g in matching_structs:
            g.print(p)
    if print_unmatching == True:
        postSection("Structures not matched yet")
        for g in not_matching:
            g.print(p)

    print(
        f"{len(matching_structs)} of {len(sdfile)} ({100* len(matching_structs)/len(sdfile)}%)  total structures in the SDF are in the reaction network."
    )

示例#25

0

显示文件

文件： helpers.py 项目： dbim-chem/ML_CO2

def morgan(bit_length=256, dir="../data/sdf/DB/", bit=True):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    morgan = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)

        if (bit == True):
            fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0],
                                                           2,
                                                           nBits=bit_length)
            morgan.append(fp_bit)
        else:
            fp = AllChem.GetMorganFingerprint(suppl[0], 2)
            morgan.append(fp)

    morgan = np.array(morgan)
    return morgan

示例#26

0

显示文件

文件： helpers.py 项目： santi921/ML_CO2

def aval(dir="../data/sdf/DB/", bit_length=256):
    aval = []
    names = []
    h**o = []
    homo1 = []
    diff = []
    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                aval.append(fp_aval)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        aval.append(fp_aval)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    aval = np.array(layer)
    return names, aval, h**o, homo1, diff

示例#27

0

显示文件

def time_fps(sdf_file, png_file, radius):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = [m for m in mols if m]

    fps_sizes = np.arange(5, 12)
    corrs, timings = speed_tests(mols, radius, 2**fps_sizes)

    sns.set(font_scale=1.5)
    f, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=300)
    ax.plot(fps_sizes, corrs, '-o')

    ax.set_xlabel('size of fingerprint ($log_2$ scale)')
    ax.set_ylabel('Correlation')
    ax.set_xticks(fps_sizes)
    ax.set_xticklabels(['%d' % 2**p for p in fps_sizes])

    for i, t in enumerate(timings):
        ax.annotate('%.1fs' % t, (fps_sizes[i], corrs[i] - 0.05),
                    fontsize='small')

    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, right=0.95)
    f.savefig(png_file)

示例#28

0

显示文件

def docking(k):
    # mol_id = k.split("/")[-1]
    mol_id = k
    protein = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_protein.pdb")
    protein_pdbqt = os.path.join(pdbbind_dir, mol_id,
                                 f"{mol_id}_protein.pdbqt")
    ligand = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.sdf")
    ligand_mol2 = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.mol2")
    log_name = os.path.join(log_dir, f'{mol_id}.log')
    out_name = os.path.join(out_pdbqt_dir, f'{mol_id}_out.pdbqt')
    pdb_name = os.path.join(pdb_dir, f'{mol_id}.pdb')
    pdbqt_name = os.path.join(pdbqt_dir, f'{mol_id}.pdbqt')
    ligand_rcsb = os.path.join(rcsb_dir, mol_id, f"{mol_id}.sdf")

    if os.path.exists(out_name):
        return

    # Generate 3D structure of ligand
    # m = Chem.MolFromSmiles(smiles)
    m = SDMolSupplier(ligand)[0]
    if m is None and os.path.exists(ligand_rcsb):
        m = SDMolSupplier(ligand_rcsb)[0]

    if m is None:
        m = Chem.MolFromMol2File(ligand_mol2)

    if m is None:
        return

    Chem.SanitizeMol(m)

    # Adding hydrogen atoms to molecule
    m = Chem.AddHs(m)
    cids = AllChem.EmbedMultipleConfs(
        m,
        numConfs=20,
    )
    cenergy = []
    for conf in cids:
        converged = not AllChem.UFFOptimizeMolecule(m, confId=conf)
        cenergy.append(
            AllChem.UFFGetMoleculeForceField(m, confId=conf).CalcEnergy())
    min_idx = cenergy.index(min(cenergy))

    m = Chem.RemoveHs(m)
    w = PDBWriter(pdb_name)
    w.write(m, min_idx)
    w.close()

    # pdb to pdbqt (both of ligand and protein)
    if not os.path.exists(pdbqt_name):
        os.system(f'obabel {pdb_name} -O {pdbqt_name}')
    if not os.path.exists(protein_pdbqt):
        os.system(f'obabel {protein} -O {protein_pdbqt}')

    command = f"smina \
            -r {protein_pdbqt} \
            -l {pdbqt_name} \
            --autobox_ligand {ligand} \
            --autobox_add 8 \
            --exhaustiveness 8 \
            --log {log_name} \
            -o {out_name} \
            --cpu 1 \
            --num_modes 100 \
            --seed 0"

    os.system(command)

示例#29

0

显示文件

文件： io.py 项目： jir322/chembl_beaker

def _parseMolData(data):
    suppl = SDMolSupplier()
    suppl.SetData(str(data))
    return [x for x in suppl if x]

示例#30

0

显示文件

文件： sdf_to_smi.py 项目： czodrowskilab/Multiprotic-pKa-Processing

from sys import argv

from rdkit.Chem import SDMolSupplier, SmilesWriter

sdm = SDMolSupplier(argv[1])
sw = SmilesWriter(argv[2], includeHeader=False, nameHeader='_Name')
for mol in sdm:
    sw.write(mol)
sw.close()