Exemplos de SDMolSupplier em Python, exemplos de rdkit.Chem.SDMolSupplier em Python

Exemplo n.º 1

0

Exibir arquivo

def _parseMolData(data):
    """Imports a molfile and verifies if all of the coordinates are set to zeros.
    if they are set to zeros then we know there are no real coordinates in the molfile
    In this case we allow RDKit to recaculate the positions of the atoms and come up with its own pictorial representation of the molecule
    If not we use the molecule as drawn"""
    suppl = SDMolSupplier()

    suppl.SetData(str(data), sanitize=False)
    data = [x for x in suppl if x]
    for x in data:
        if not x.HasProp("_drawingBondsWedged"):
            SanitizeMol(x)
        ctab = MolToMolBlock(x)
        ctablines = [
            item.split("0.0000") for item in ctab.split("\n")
            if "0.0000" in item
        ]
        needs_redraw = 0
        for line in ctablines:
            if len(line) > 3:
                needs_redraw += 1
        if needs_redraw == len(ctablines):
            #check for overlapping molecules in the CTAB
            SanitizeMol(x)
            Compute2DCoords(x)
    return data

Exemplo n.º 2

0

Exibir arquivo

 def parse_molblock(self, mb):
     """parse molblock and return mol"""
     #mol = MolFromMolBlock(mb)
     sd = SDMolSupplier()
     sd.SetData(mb)
     mol = next(sd)
     if mol:
         return mol
     else:
         return None

Exemplo n.º 3

0

Exibir arquivo

def rd_kit(dir_sdf = "../data/sdf/"):

    temp_str = "ls " + dir_sdf
    temp = os.popen(temp_str).read()
    temp = str(temp).split()
    bit_length = 1024

    sim_matrix_morgan = []
    sim_matrix_rdk = []
    sim_matrix_aval = []
    sim_matrix_layer = []

    baseline = SDMolSupplier(dir_sdf + temp[0])

    baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length)
    baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)
    baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)
    baseline_layer = AllChem.LayeredFingerprint(baseline[0])
    count = 0
    for item in temp:
        suppl = SDMolSupplier(dir_sdf + item)
        count += 1
        fp = AllChem.GetMorganFingerprint(suppl[0], 2)

        fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 3, nBits=bit_length)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=3)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])

        sim_matrix_morgan.append(
            DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_rdk.append(
            DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_aval.append(
            DataStructs.FingerprintSimilarity(baseline_aval, fp_aval, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_layer.append(
            DataStructs.FingerprintSimilarity(baseline_layer, fp_layer, metric=DataStructs.TanimotoSimilarity))

    sim_matrix_morgan = np.array(sim_matrix_morgan)
    sim_matrix_rdk = np.array(sim_matrix_rdk)
    sim_matrix_aval = np.array(sim_matrix_aval)
    sim_matrix_layer = np.array(sim_matrix_layer)

    label_morgan = "morgan" + str(bit_length)
    plt.hist(sim_matrix_morgan, label = label_morgan)
    plt.hist(sim_matrix_rdk, label = "rdk2")
    #plt.hist(sim_matrix_aval, label = "avalon128")
    #plt.hist(sim_matrix_layer, label = "layer")
    print(np.mean(sim_matrix_rdk))
    print(count)
    plt.xlabel("Similarity to Baseline")
    plt.ylabel("Counts")
    plt.title("Different Fingerprinting Methods, Similarity to Baseline")
    plt.legend()
    plt.show()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: utils.py Projeto: MikolajMizera/SelVie

def MCS_NN_search(sdf_file):
    
    sess_dir = split(sdf_file)[0]
    mol = SDMolSupplier(sdf_file, removeHs=True)[0]
    candidate_ids = literal_eval(mol.GetPropsAsDict()['NN'])
    candidate_mols = [SDMolSupplier(join(sess_dir, '%d.sdf'%id), removeHs=True)[0]
                        for id in candidate_ids]
    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs([mol], candidate_mols)
    mol_img = draw_base64(mol, highlightAtoms=MCS_matches[0])
    nn_img = draw_base64(NN_mols[0], highlightAtoms=NN_MCS_matches[0])
    return mol_img, nn_img

Exemplo n.º 5

0

Exibir arquivo

Arquivo: utils.py Projeto: MikolajMizera/SelVie

def parse_sdf(contents, filename):
    """Loads contents of an uploaded file and tries to parse as a SDF. Returns
    list of RDKit molecules and status message. Returns empty list and error
    meassage in case of failure."""
    
    content_type, content_string = contents.split(',')
    decoded = b64decode(content_string)
    session_id = str(uuid4())
    
    try:
        if filename[-4:].lower()=='.sdf':
            
            # Generate random file name and save contents to a file
            unique_fname = join('uploads', '%s.sdf'%session_id)
            with open(unique_fname, 'w') as fh:
                fh.write(decoded.decode('utf-8'))
                
            mols = SDMolSupplier(unique_fname, removeHs=False)
            n_mols = len(mols)
            mols = [m for m in mols if m]
            n_sucess = len(mols)
            
            try:
                remove(unique_fname)
            except Exception as e:
                #This is not critical
                print(e)
                
            return mols, 'Loaded %d/%d mols'%(n_mols, n_sucess), session_id
        else:
            return [], 'The file has a wrong format.', session_id
        
    except Exception as e:
        print(e)
        return [], 'Error occured during processing of a file.', session_id

Exemplo n.º 6

0

Exibir arquivo

def file_to_mols(filepath):
    if filepath.endswith('.smi'):
        print('Converting SMILES to list of Mols')
        sys.stdout.flush()
        with open(filepath) as infile:
            smiles_list = [line.rstrip() for line in infile.readlines()]
        # Multiprocessing with all available threads
        #with Pool(processes = os.cpu_count()) as pool:
        #mols = pool.map(smi_to_mol, smiles_list)

        mols = process_map(smi_to_mol,
                           smiles_list,
                           chunksize=100,
                           max_workers=a.worker)

        mols = [m for m in mols if m]

    elif filepath.endswith('.sd') or filepath.endswith('.sdf'):
        mols = [mol for mol in SDMolSupplier(filepath) if mol]

    else:
        raise Exception('Invalid file: {}\n'.format(filepath) +
                        '.smi, .sd, or .sdf extension is expected')

    return mols

Exemplo n.º 7

0

Exibir arquivo

Arquivo: get_random_mol.py Projeto: xduan7/biochem-graph

def get_random_mol() -> Mol:

    _mol_supplier = SDMolSupplier(PROCESSED_SDF_PATH)
    _index = randint(0, len(_mol_supplier) - 1)
    assert _mol_supplier[_index]

    return _mol_supplier[_index]

Exemplo n.º 8

0

Exibir arquivo

Arquivo: descriptor_test.py Projeto: santi921/ML_CO2

def rd_kit_morgan(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	bit_length = 256
	sim_matrix_morgan = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		#Note: morgan can output vectors as two types
		fp = AllChem.GetMorganFingerprint(suppl[0], 2)
		fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 2, nBits=bit_length)
		sim_matrix_morgan.append(fp_bit)

	sim_matrix_morgan = np.array(sim_matrix_morgan)
	return sim_matrix_morgan

Exemplo n.º 9

0

Exibir arquivo

Arquivo: descriptor_test.py Projeto: santi921/ML_CO2

def rd_kit_rd(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	sim_matrix_rdk = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
		sim_matrix_rdk.append(DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))

	sim_matrix_rdk = np.array(sim_matrix_rdk)
	return sim_matrix_rdk

Exemplo n.º 10

0

Exibir arquivo

Arquivo: descriptor_test.py Projeto: santi921/ML_CO2

def rd_kit_aval(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	bit_length = 256
	sim_matrix_aval = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])

	baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
		sim_matrix_aval.append(fp_aval)
	sim_matrix_aval = np.array(sim_matrix_aval)
	return sim_matrix_aval

Exemplo n.º 11

0

Exibir arquivo

def test_MCS(sdf_file):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    nns_ids = get_Tanimoto_NNs(mols, mols, 3, fps_nbits=512, order=1, nns=10)

    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs(mols,
                                                          mols,
                                                          nns_indices=nns_ids)

Exemplo n.º 12

0

Exibir arquivo

def split(sdf, label_col, folder, splitfold=5):
    """
    Stratified splitting of dataset into k-folds
    :param mols: Input molecules as dataset
    :param label_col: Column name of labels for stratification
    :param folder: Folder/model name
    :param splitfold: k number of folds
    :return:
    """

    if folder is None:
        sdf_path = pathlib.Path(sdf)
        sdf_name = sdf_path.name.partition('.')[0]

        folder = sdf_path.parent.joinpath(sdf_name)
        if not folder.is_dir():
            folder.mkdir()
        folder = folder.absolute()

    else:
        p = pathlib.Path(folder)
        if not p.is_dir():
            p.mkdir()

    train_files = []
    test_files = []

    sdm = SDMolSupplier(sdf)
    mols = [x for x in sdm]

    labels = []
    for i in range(len(mols)):
        labels.append(mols[i].GetProp(label_col))

    skf = StratifiedKFold(n_splits=splitfold)
    fold = 0
    for train_ix, test_ix in skf.split(mols, labels):
        test_set_fn = "{}/testset_{}.sdf".format(folder, fold)
        train_set_fn = "{}/trainset_{}.sdf".format(folder, fold)

        sdw_train = SDWriter(train_set_fn)
        for i in train_ix:
            sdw_train.write(mols[i])
        sdw_train.close()
        train_files.append(train_set_fn)


        sdw_test = SDWriter(test_set_fn)
        for i in test_ix:
            sdw_test.write(mols[i])
        sdw_test.close()
        test_files.append(test_set_fn)
        fold += 1

    return {'train_files': train_files,
            'test_files': test_files}, folder

Exemplo n.º 13

0

Exibir arquivo

Arquivo: io.py Projeto: bkbonde/chembl_beaker

def _parseMolData(data, sanitize=True, removeHs=True, strictParsing=True):
    fd, fpath = tempfile.mkstemp(text=True)
    os.write(fd, data)
    os.close(fd)
    suppl = SDMolSupplier(fpath,
                          sanitize=sanitize,
                          removeHs=removeHs,
                          strictParsing=strictParsing)
    res = [x for x in suppl if x]
    os.remove(fpath)
    return res

Exemplo n.º 14

0

Exibir arquivo

Arquivo: helpers.py Projeto: santi921/ML_CO2

def morgan(bit_length=256, dir="../data/sdf/DB3/", bit=True):

    morgan = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)

            if (bit == True):
                try:
                    fp = AllChem.GetMorganFingerprintAsBitVect(
                        suppl[0], int(2), nBits=int(bit_length))
                except:
                    pass
            else:
                try:
                    fp = AllChem.GetMorganFingerprint(suppl[0], int(2))
                except:
                    print("error")
                    pass

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                morgan.append(fp)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        morgan.append(fp)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    morgan = np.array(morgan)
    return names, morgan, h**o, homo1, diff

Exemplo n.º 15

0

Exibir arquivo

Arquivo: helpers.py Projeto: dbim-chem/ML_CO2

def layer(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    layer = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])
        layer.append(fp_layer)
    layer = np.array(layer)
    return layer

Exemplo n.º 16

0

Exibir arquivo

Arquivo: helpers.py Projeto: dbim-chem/ML_CO2

def rdk(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    rdk = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
        rdk.append(fp_rdk)
    rdk = np.array(rdk)
    return rdk

Exemplo n.º 17

0

Exibir arquivo

def sdf_to_info(sdf_folder, save_folder):
    sdf_files = glob(osp.join(sdf_folder, "*.sdf"))
    result = pd.DataFrame()
    for sdf in sdf_files:
        f_id = osp.basename(sdf).split(".")[0]
        this_info = {"file_name": f_id}
        mol = list(SDMolSupplier(sdf))[0]
        this_info["SMILES"] = mol.GetProp("SMILES")
        this_info["n_heavy"] = mol.GetNumHeavyAtoms()
        result = result.append(this_info, ignore_index=True)
    result = result.sort_values(by="n_heavy")
    result.to_csv(osp.join(save_folder, "info.csv"), index=False)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: helpers.py Projeto: dbim-chem/ML_CO2

def aval(dir="../data/sdf/DB/", bit_length=128):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    avalon = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)
        avalon.append(fp_aval)

    avalon = np.array(avalon)
    return avalon

Exemplo n.º 19

0

Exibir arquivo

Arquivo: descriptor_test.py Projeto: santi921/ML_CO2

def rd_kit_morgan(dir_sdf = "../data/sdf/"):
	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	sim_matrix_layer = []

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_layer = AllChem.LayeredFingerprint(suppl[0])
		sim_matrix_layer.append(fp_layer)
		sim_matrix_layer = np.array(sim_matrix_layer)

	return sim_matrix_layer

Exemplo n.º 20

0

Exibir arquivo

Arquivo: utils.py Projeto: MikolajMizera/SelVie

def load_props(mols_dir):
    
    props = []
    for f in glob(join(mols_dir, '*.sdf')):
        p = SDMolSupplier(f)[0].GetPropsAsDict()
        p['id'] = split(f)[-1].replace('.sdf', '')
        props.append(p)
    df = pd.DataFrame(props)
    
    # Limit dataframe only to necessary columns    
    sorted_cols = []
    for r in sorted(set([c.split('_')[0] for c in df.columns if 'prediction' in c])):
        sorted_cols += ['%s_experimental'%r, '%s_prediction'%r, '%s_error'%r]
    sorted_cols = ['molId']+sorted_cols+['Similarity_Tanimoto', 'NN', 'id']
    
    return df[sorted_cols]

Exemplo n.º 21

0

Exibir arquivo

Arquivo: min_sdf.py Projeto: SongXia-NYU/dataProviders

def min_sdf():
    files = glob("raw/openchem_logP_confs/*.sdf")
    for f in tqdm(files):
        try:
            suppl = SDMolSupplier(f, removeHs=False)
            lowest_e = np.inf
            selected_mol = None
            for mol in suppl:
                energy = float(mol.GetProp("energy_abs"))
                if energy < lowest_e:
                    lowest_e = energy
                    selected_mol = mol
            if selected_mol is not None:
                writer = SDWriter(f"raw/openchem_logP_mmff_sdfs/{osp.basename(f).split('.')[0].split('_')[0]}.mmff.sdf")
                writer.write(selected_mol)
        except Exception as e:
            print(e)

Exemplo n.º 22

0

Exibir arquivo

def test_preprocess_mols(sdf_file, session_id):

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    df = preprocess_mols(mols, session_id)

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    return df

Exemplo n.º 23

0

Exibir arquivo

Arquivo: helpers.py Projeto: santi921/ML_CO2

def rdk(dir="../data/sdf/DB/"):

    rdk = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                rdk.append(fp_rdk)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        rdk.append(fp_rdk)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    rdk = np.array(rdk)
    return names, rdk, h**o, homo1, diff

Exemplo n.º 24

0

Exibir arquivo

Arquivo: main.py Projeto: pk-organics/reac-space-exp

def check_sdf_matches(dg,
                      sdf_file,
                      draw_structures=True,
                      print_unmatching=False):
    """
	After generating the network, try to see if any structures match with those in SDF files
	These files were usually created manually, storing structures reported in experimental
	studies. The purpose is to match our simulations with experiments.

	Keyword arguments:
	dg			-- the derivation graph of the network
	sdf_file	-- path to the SDF file
	draw_structures -- whether or not to print the structures in the summary pdf
	"""
    matching_structs = []
    not_matching = []
    postSection('Matching Structures')
    print(f"Checking for matches with structures in {sdf_file}")
    sdfile = SDMolSupplier(sdf_file)
    for mol in sdfile:
        Kekulize(mol)
        smi = MolToSmiles(mol, kekuleSmiles=True)
        mol_graph = smiles(smi, add=False)
        for v in dg.vertices:  #dg_new.vertices
            if v.graph.isomorphism(mol_graph) == 1:
                matching_structs.append(mol_graph)
                print(
                    f"Structure {v.graph.smiles} in the network matches a test set molecule!"
                )
        if mol_graph not in matching_structs:
            not_matching.append(mol_graph)
    if draw_structures == True:
        for g in matching_structs:
            g.print(p)
    if print_unmatching == True:
        postSection("Structures not matched yet")
        for g in not_matching:
            g.print(p)

    print(
        f"{len(matching_structs)} of {len(sdfile)} ({100* len(matching_structs)/len(sdfile)}%)  total structures in the SDF are in the reaction network."
    )

Exemplo n.º 25

0

Exibir arquivo

Arquivo: helpers.py Projeto: dbim-chem/ML_CO2

def morgan(bit_length=256, dir="../data/sdf/DB/", bit=True):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    morgan = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)

        if (bit == True):
            fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0],
                                                           2,
                                                           nBits=bit_length)
            morgan.append(fp_bit)
        else:
            fp = AllChem.GetMorganFingerprint(suppl[0], 2)
            morgan.append(fp)

    morgan = np.array(morgan)
    return morgan

Exemplo n.º 26

0

Exibir arquivo

Arquivo: helpers.py Projeto: santi921/ML_CO2

def aval(dir="../data/sdf/DB/", bit_length=256):
    aval = []
    names = []
    h**o = []
    homo1 = []
    diff = []
    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                aval.append(fp_aval)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        aval.append(fp_aval)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    aval = np.array(layer)
    return names, aval, h**o, homo1, diff

Exemplo n.º 27

0

Exibir arquivo

def time_fps(sdf_file, png_file, radius):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = [m for m in mols if m]

    fps_sizes = np.arange(5, 12)
    corrs, timings = speed_tests(mols, radius, 2**fps_sizes)

    sns.set(font_scale=1.5)
    f, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=300)
    ax.plot(fps_sizes, corrs, '-o')

    ax.set_xlabel('size of fingerprint ($log_2$ scale)')
    ax.set_ylabel('Correlation')
    ax.set_xticks(fps_sizes)
    ax.set_xticklabels(['%d' % 2**p for p in fps_sizes])

    for i, t in enumerate(timings):
        ax.annotate('%.1fs' % t, (fps_sizes[i], corrs[i] - 0.05),
                    fontsize='small')

    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, right=0.95)
    f.savefig(png_file)

Exemplo n.º 28

0

Exibir arquivo

def docking(k):
    # mol_id = k.split("/")[-1]
    mol_id = k
    protein = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_protein.pdb")
    protein_pdbqt = os.path.join(pdbbind_dir, mol_id,
                                 f"{mol_id}_protein.pdbqt")
    ligand = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.sdf")
    ligand_mol2 = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.mol2")
    log_name = os.path.join(log_dir, f'{mol_id}.log')
    out_name = os.path.join(out_pdbqt_dir, f'{mol_id}_out.pdbqt')
    pdb_name = os.path.join(pdb_dir, f'{mol_id}.pdb')
    pdbqt_name = os.path.join(pdbqt_dir, f'{mol_id}.pdbqt')
    ligand_rcsb = os.path.join(rcsb_dir, mol_id, f"{mol_id}.sdf")

    if os.path.exists(out_name):
        return

    # Generate 3D structure of ligand
    # m = Chem.MolFromSmiles(smiles)
    m = SDMolSupplier(ligand)[0]
    if m is None and os.path.exists(ligand_rcsb):
        m = SDMolSupplier(ligand_rcsb)[0]

    if m is None:
        m = Chem.MolFromMol2File(ligand_mol2)

    if m is None:
        return

    Chem.SanitizeMol(m)

    # Adding hydrogen atoms to molecule
    m = Chem.AddHs(m)
    cids = AllChem.EmbedMultipleConfs(
        m,
        numConfs=20,
    )
    cenergy = []
    for conf in cids:
        converged = not AllChem.UFFOptimizeMolecule(m, confId=conf)
        cenergy.append(
            AllChem.UFFGetMoleculeForceField(m, confId=conf).CalcEnergy())
    min_idx = cenergy.index(min(cenergy))

    m = Chem.RemoveHs(m)
    w = PDBWriter(pdb_name)
    w.write(m, min_idx)
    w.close()

    # pdb to pdbqt (both of ligand and protein)
    if not os.path.exists(pdbqt_name):
        os.system(f'obabel {pdb_name} -O {pdbqt_name}')
    if not os.path.exists(protein_pdbqt):
        os.system(f'obabel {protein} -O {protein_pdbqt}')

    command = f"smina \
            -r {protein_pdbqt} \
            -l {pdbqt_name} \
            --autobox_ligand {ligand} \
            --autobox_add 8 \
            --exhaustiveness 8 \
            --log {log_name} \
            -o {out_name} \
            --cpu 1 \
            --num_modes 100 \
            --seed 0"

    os.system(command)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: io.py Projeto: jir322/chembl_beaker

def _parseMolData(data):
    suppl = SDMolSupplier()
    suppl.SetData(str(data))
    return [x for x in suppl if x]

Exemplo n.º 30

0

Exibir arquivo

Arquivo: sdf_to_smi.py Projeto: czodrowskilab/Multiprotic-pKa-Processing

from sys import argv

from rdkit.Chem import SDMolSupplier, SmilesWriter

sdm = SDMolSupplier(argv[1])
sw = SmilesWriter(argv[2], includeHeader=False, nameHeader='_Name')
for mol in sdm:
    sw.write(mol)
sw.close()