Exemplo n.º 1
0
def search_cocrystals(filter_solvents=True):
    '''
    Search the whole CSD for structures that contain two different molecules
    with the specific settings
    '''
    start_time = time.clock()
    csd = MoleculeReader('CSD')
    entry_reader = EntryReader('CSD')
    settings = search.Search.Settings()
    settings.only_organic = True
    settings.not_polymeric = True
    settings.has_3d_coordinates = True
    settings.no_disorder = True
    settings.no_errors = True
    settings.no_ions = True
    settings.no_metals = True
    pairs=[]
    for entry in csd:
        #if len(pairs)==100:
        #    break
        if settings.test(entry):
            mol = csd.molecule(entry.identifier)
            mol.normalise_labels()
            smi= mol.smiles
            if smi !=  None:
                smi = smi.split('.')
                # We make sure that the structure consist of two different molecules
                if len(Remove(smi)) == 2:                
                    pairs.append(mol.identifier)            
    # clean the list from solvents
    if filter_solvents:
        print('Solvates and hydrates will be removed')
        solvates=[]
        name_dict={}
        for mol1 in pairs:
            mol = csd.molecule(mol1)
            e=entry_reader.entry(mol1)
            name_dict[mol1]=e.chemical_name
            for i in range(0, (len(mol.components))):
                if mol.components[i].smiles in clean_smiles.SOLVENT_SMILES:
                    solvates.append(mol.identifier)    
        solvates = Remove(solvates)
        final_cocrystals = [x for x in pairs if x not in solvates]   
        #print(name_dict) 
    else:
        final_cocrystals=pairs
    # Clean the list from polymorphs
    cocrystals = remove_polymorphs(final_cocrystals)
    #print the time
    end_time = time.clock()
    name=[]
    name= [name_dict[i] for i in cocrystals]
    cocrystals_data= pd.concat([pd.DataFrame(cocrystals, columns=['csd_id']), pd.DataFrame(name, columns=['name'])], axis=1)
    cocrystals_data=cocrystals_data.dropna(axis=0)
    dataset_cocrystals = cocrystals_data[~cocrystals_data.name.str.contains("solvate")]
    dataset_cocrystals = dataset_cocrystals[~dataset_cocrystals.name.str.contains("clathrate")] 
     
    print(end_time-start_time)
    dataset_cocrystals.to_csv('new_all_cocrystals.csv',index=False)
    return cocrystals
Exemplo n.º 2
0
def get_smiles_from_csd():
    ''' Read each CSD identifier and save the smiles  '''
    co_crystals = pd.read_csv('datasets/train_data/cocrystals2020.csv',
                              encoding='latin1')
    co_crystals = co_crystals.iloc[:, :]
    #print(co_crystals.csd_id)
    smiles1 = []
    smiles2 = []
    year = []
    for i in co_crystals.csd_id.values:
        #print(i)
        csd = MoleculeReader('CSD')
        csd_reader = io.EntryReader('CSD')
        year.append(csd_reader.entry(i).publication.year)
        mol = csd.molecule(i)
        smi = mol.smiles
        smi = smi.split('.')
        smi = Remove(smi)
        smiles1.append(smi[0])
        smiles2.append(smi[1])
        #print(len(smiles1))
    #cocrystal_data = pd.concat([co_crystals , pd.DataFrame(smiles1, columns=['smiles1']), pd.DataFrame(smiles2, columns=['smiles2']),
    #pd.DataFrame(year, columns=['year'])], axis=1)
    #cocrystal_data.to_csv('datasets/train_data/all_cocrystals_info.csv')
    return co_crystals, smiles1, smiles2
Exemplo n.º 3
0
    def get_ensemble(self, nrotations, charged=False):
        largest_lig = self.find_largest_ligand()
        lig = MoleculeReader(largest_lig)[0]
        prot = Protein.from_file(join(dirname(largest_lig), 'protein.mol2'))
        bs = Protein.BindingSiteFromMolecule(protein=prot,
                                             molecule=lig,
                                             distance=6.5)

        # prot_paths = glob(join(self.root_dir, '*', 'protein.mol2'))
        prot_paths = self.get_protein_paths()
        print(prot_paths)
        print(self.ensemble_name, len(prot_paths))
        luigi.build([
            ParalleliselRunner(
                prot_paths, nrotations, charged, data_source='KLIFS')
        ],
                    local_scheduler=True,
                    workers=30)
        #luigi.build([ParalleliselRunner(prot_paths, nrotations, charged)], local_scheduler=True,
        #workers=30)
        hot_paths = [
            join(dirname(in_pdb), "fullsize_hotspots_{}".format(nrotations),
                 "out.zip") for in_pdb in prot_paths
        ]
        return hot_paths
Exemplo n.º 4
0
    def scaled_score_ligands(self, tolerance):
        """
        Applies linear scaling to scores assigned to atom, depending on distance between atom and the scored point.
        :param int tolerance: How many gridpoints away is it acceptable for an atom to be from the nearest point of its corresponding map.
        :return: 
        """
        dsc = self.get_scorer_result()
        hs = dsc.get_hotspot()
        all_ligs = MoleculeReader(
            join(self.hotspot_path, "docking_tmp", "docked_ligands.mol2"))

        scored_ligs = []

        for lig in all_ligs:
            scored_lig = dsc.get_scaled_score(lig, tolerance, hs)[0]
            ligand_score = np.mean(
                [a.partial_charge for a in scored_lig.heavy_atoms])
            scored_lig.identifier += "_{}".format(round(ligand_score, 2))
            scored_ligs.append(scored_lig)

        with MoleculeWriter(
                os.path.join(self.hotspot_path,
                             "scored_docks.mol2")) as writer:
            for ligand in scored_ligs:
                writer.write(ligand)
def main():
    p = Pool(8)
    args = parse_arguments()
    sdf_dir = os.path.join(args.sdf_dir)
    list_of_sdf_files = [filename for filename in read_sdf_file(sdf_dir)]
    print(list_of_sdf_files)
    proc = Conformer_generator(args)
    t = TicToc()
    t.tic()

    for file in list_of_sdf_files:
        sdf_file_name = file.split('_')[0]
        print(sdf_file_name)
        full_directory_path = os.path.join(args.conformers_file_dir,
                                           '{}'.format(sdf_file_name))
        os.makedirs(full_directory_path)
        os.chdir(full_directory_path)
        try:
            molecule_object_from_sdf_file = MoleculeReader(
                os.path.join(sdf_dir, file))
            list_of_molecules = [m for m in molecule_object_from_sdf_file]

            p.map(proc.generate_conformer, list_of_molecules)
        except:
            print("can not read sdf file {}".format(file))

    t.toc()
    print(t.elapsed)
Exemplo n.º 6
0
def hot_calc(inputs):
    pdb, het, pdir = inputs
    p = Protein.from_file(os.path.join(pdir, f"{pdb}.pdb"))
    mol = MoleculeReader(os.path.join(pdir, f"{pdb}_{het}.mol2"))[0]

    runner = Runner()
    hr = runner.from_protein(p, nprocesses=3, cavities=mol)

    for p, g in hr.super_grids.items():
        hr.super_grids[p] = g.max_value_of_neighbours()

    # with HotspotReader(os.path.join(pdir, "out.zip")) as r:
    #     hr = [h for h in r.read() if h.identifier == "hotspot"][0]

    e = Extractor(hr)
    bv = e.extract_volume(volume=250)

    # smoothing
    for p, g in bv.super_grids.items():
        bv.super_grids[p] = g.gaussian(sigma=0.5)

    bv.identifier = "bestvol"
    hr.identifier = "hotspot"

    with HotspotWriter(pdir) as w:
        w.write([hr, bv])
Exemplo n.º 7
0
    def find_largest_ligand(self):
        """
        Looks for the largest ligand returned in the SIENA ensemble.
        :return: 
        """
        # Get the ligands for the proteins returned by SIENA
        print(self.lig_dir)
        mol_paths = glob(join(self.lig_dir, "*.sdf"))
        print(mol_paths)
        mols = MoleculeReader(mol_paths)

        # Get a dictionary of the molecule_ID and the filename
        mw_dict = {
            basename(m_fname): m.molecular_weight
            for m_fname, m in zip(mols.file_name, mols)
        }

        print(mw_dict)
        # Get the filename of the largest ligand:
        try:
            largest_lig = sorted(
                ((value, key) for (key, value) in mw_dict.items()),
                reverse=True)[0][1]
            print(largest_lig)
            return largest_lig

        except IndexError:
            print("SIENA found no ligands for ensemble {}".format(
                self.ensemble_name))
            return
Exemplo n.º 8
0
    def dock(self):
        """
        handle docking run with GOLD
        :return:
        """
        docker = Docker()

        # enables hotspot constraints
        docker.settings = hs_screening.DockerSettings()

        f = os.path.join(self.temp, self.hs_pdb + ".mol2")
        with MoleculeWriter(f) as w:
            w.write(self.protein)

        # setup
        docker.settings.add_protein_file(f)
        docker.settings.binding_site = docker.settings.BindingSiteFromPoint(protein=docker.settings.proteins[0],
                                                                            origin=self.ligand.centre_of_geometry(),
                                                                            distance=12.0)

        docker.settings.fitness_function = 'plp'
        docker.settings.autoscale = 10.
        docker.settings.output_directory = self.temp
        docker.settings.output_file = "docked_ligands.mol2"
        docker.settings.add_ligand_file(self.search_ligands, ndocks=3)

        # constraints
        # docker.settings.add_constraint(
        #     docker.settings.TemplateSimilarityConstraint(type="all", template=self.ligand, weight=150)
        #)

        # extractor = best_volume.Extractor(hr=self.hr, volume=300, mode="global", mvon=False)
        # bv = extractor.extracted_hotspots[0]
        #
        # with hs_io.HotspotWriter(path=os.path.join(self.path, "bv")) as hw:
        #     hw.write(extractor.extracted_hotspots)
        #
        # hs = docker.settings.HotspotHBondConstraint.from_hotspot(protein=docker.settings.proteins[0],
        #                                                          hr=bv,
        #                                                          weight=150,
        #                                                          max_constraints=2)
        #
        # docker.settings.add_constraint(hs)
        # docker.settings.add_apolar_fitting_points(hr=self.hr)
        #
        # mol = Molecule(identifier="constraints")
        # for a in hs.atoms:
        #     mol.add_atom(Atom(atomic_symbol="C",
        #                       atomic_number=14,
        #                       label="Du",
        #                       coordinates=a.coordinates))
        #
        # with MoleculeWriter(os.path.join(self.path, "constaints.mol2")) as w:
        #     w.write(mol)

        # dock
        docker.dock()
        return MoleculeReader(os.path.join(docker.settings.output_directory,
                                           docker.settings.output_file))
Exemplo n.º 9
0
 def _read_ligands(self):
     """
     Reads the scored ligands from the result directories for the target.
     :return: a :class: ccdc.io.MoleculeReader instance
     """
     lig_paths = glob(join(self.stem, "*", "scored_ligands.mol2"))
     scored_ligs = MoleculeReader(lig_paths)
     return scored_ligs
    def test_func(self, unp_id):
        # Parse arguments
        overlay_dir = self.args.query
        active_dir = self.args.actives_dir
        decoy_dir = self.args.decoys_dir
        param_dir = self.args.param_dir
        active_conformers_dir = os.path.join(self.args.conformers_dir_actives,
                                             '{}'.format(unp_id))
        decoy_conformers_dir = os.path.join(self.args.conformers_dir_decoys,
                                            '{}'.format(unp_id))

        actives_to_screen = os.path.join(
            active_dir, '{}_active_3d_rdkit.sdf'.format(unp_id))
        print(actives_to_screen)
        decoys_to_screen = os.path.join(decoy_dir,
                                        '{}_decoy_3d_rdkit.sdf'.format(unp_id))
        print(decoys_to_screen)

        output_dir = self.args.output_directory
        complete_output_dir = os.path.join(output_dir, '{}'.format(unp_id))
        #overlay_mol = os.path.join(overlay_dir, '{}_corina.sdf'.format(unp_id))

        #overlay_mol = os.path.join(overlay_dir, '{}.sdf'.format(unp_id))
        overlay_mol = self.fetch_overlay_mol(file_dir=overlay_dir, id=unp_id)
        print(overlay_mol)
        largest_ligand = self.select_largest_ligand(overlay_mol)
        print(largest_ligand)
        query = [m for m in MoleculeReader(largest_ligand)
                 ]  # Read the query mol or overlay of mols
        print(query)

        settings = setup_screener(param_dir)
        os.makedirs(complete_output_dir)
        os.chdir(complete_output_dir)
        screener = Screener(
            query, settings=settings)  # Generate fields around the input query

        output_name_actives = os.path.join(
            complete_output_dir, "{}_actives_screened.mol2".format(unp_id))
        print(output_name_actives)
        output_name_decoys = os.path.join(
            complete_output_dir, "{}_decoys_screened.mol2".format(unp_id))
        print(output_name_decoys)
        actives_scores = screen_molecules(
            screener, actives_to_screen, 1, active_conformers_dir,
            output_name_actives)  ### Screen set of actives
        decoys_scores = screen_molecules(
            screener, decoys_to_screen, 0, decoy_conformers_dir,
            output_name_decoys)  ### Screen set of decoys
        print("writing scores")
        all_data = actives_scores
        all_data.extend(decoys_scores)
        screening_scores = sorted(all_data)

        output_name_scores = os.path.join(
            complete_output_dir, "{}_screening_scores.csv".format(unp_id))
        write_scores(screening_scores, output_name_scores)
        print("finish:{}".format(unp_id))
Exemplo n.º 11
0
    def _get_out_maps(self, probe, grid_dict, return_probes=False):
        """
        private method

        organises the sampling of weighted superstar maps by molecular probes
        :param str probe: probe identifier set in the Atomic Hotspot calculation
        :param dict grid_dict: dictionary with key = probe identifier and value = `hotspots.grid_extension.Grid`
        :param bool return_probes: optional, bool indicating if probe molecules should be returned
        :return:
        """
        donor_grid = _SampleGrid('donor', grid_dict['donor'], _SampleGrid.is_donor)
        acceptor_grid = _SampleGrid('acceptor', grid_dict['acceptor'], _SampleGrid.is_acceptor)
        apolar_grid = _SampleGrid('apolar', grid_dict['apolar'], _SampleGrid.is_apolar)

        if self.charged_probes:
            negative_grid = _SampleGrid('negative', grid_dict['negative'], _SampleGrid.is_negative)
            positive_grid = _SampleGrid('positive', grid_dict['positive'], _SampleGrid.is_positive)

        kw = {'settings': self.sampler_settings}
        if self.charged_probes:
            self.sampler = self._Sampler(apolar_grid, donor_grid, acceptor_grid, negative_grid, positive_grid, **kw)
        else:
            self.sampler = self._Sampler(apolar_grid, donor_grid, acceptor_grid, **kw)

        probe_path = pkg_resources.resource_filename('hotspots', 'probes/')

        if self.charged_probes:
            if probe == "negative" or probe == "positive":
                mol = MoleculeReader(join(probe_path, "rotate-{}_{}_flat.mol2".format(probe, "test")))[0]
            else:
                mol = MoleculeReader(join(probe_path, "rotate-{}_{}_flat.mol2".format(probe, self.probe_size)))[0]
        else:
            mol = MoleculeReader(join(probe_path, "rotate-{}_{}_flat.mol2".format(probe, self.probe_size)))[0]

        probes = self.sampler.sample(mol, probe=probe)

        for pg in self.sampler.probe_grids:
            if pg.name.lower() == probe:
                try:
                    self.out_grids[pg.name].append(pg.grid)
                except KeyError:
                    self.out_grids[pg.name] = [pg.grid]

        if return_probes:
            return probes
Exemplo n.º 12
0
 def _molecule(self, filename, no_assign_bond_types):
     """  Implementation detail - Get the first molecule from a file
     """
     m = MoleculeReader(filename)
     mol = m[0]
     mol.normalise_labels()
     if not no_assign_bond_types:
         mol.assign_bond_types()
     return mol
Exemplo n.º 13
0
 def __init__(self, data, out_dir):
     self.data = data
     self.files = {'protein': [], 'ligands': []}
     self.out_dir = out_dir
     self._write()
     self._protein = Protein.from_file(self.files['protein'][0])
     self._ligands = [
         y for y in [MoleculeReader(x) for x in self.files['ligands']]
     ]
Exemplo n.º 14
0
 def get_fragment(self):
     """
     Gets the reference fragment
     :return: 
     """
     if not self.reference_fragment:
         ligs_p = join(self.hotspot_path, "scored_ligands.mol2")
         ligs = MoleculeReader(ligs_p)
         self.reference_fragment = ligs[0]
 def select_largest_ligand(self, overlay_file):
     overlay = [m for m in MoleculeReader(overlay_file)]
     atoms = [len(m.atoms) for m in overlay]
     max_index, max_value = max(enumerate(atoms),
                                key=operator.itemgetter(1))
     largest_ligand = overlay[max_index].identifier
     filepath = os.path.join(self.args.pdb_ligand_dir,
                             '{}.sdf'.format(largest_ligand))
     return filepath
    def setUp(self) -> None:
        self.tmpdir = os.path.abspath("testdata/wrapper_arpeggio/tmpdir")
        source = "file"
        self.data = f"testdata/wrapper_arpeggio/prepare/{source}"

        self.arpeggio_2vta = Arpeggio(pdb_code="2vta",
                                      hetid="LZ1",
                                      chain="A",
                                      tmpdir=os.path.join(self.tmpdir, "lz1"))
        self.testlz1 = MoleculeReader(os.path.join(self.data, "LZ1.mol2"))[0]
        self.lz1_resid = "A/1301/"

        self.arpeggio_1xkk = Arpeggio(pdb_code="1xkk",
                                      hetid="FMM",
                                      chain="A",
                                      tmpdir=os.path.join(self.tmpdir, "fmm"))
        self.testfmm = MoleculeReader(os.path.join(self.data, "FMM.mol2"))[0]
        self.fmm_resid = "A/91/"
Exemplo n.º 17
0
def run():
    # must be abspath
    parent = sys.argv[1]
    score = sys.argv[2]
    autoscale = sys.argv[3]
    run_id = sys.argv[4]
    crossminer_file = os.path.join(parent, sys.argv[5])

    # data
    conf_name = "hs_gold.conf"
    out_path = check_dir(os.path.join(parent, "gold_results"))
    out_path = check_dir(os.path.join(out_path, run_id))
    junk = check_dir(os.path.join(out_path, "all"))
    hotspot = os.path.join(parent, "hotspot_pharmacophore", "out.zip")
    crystal_ligand = os.path.join(parent, "crystal_ligand.mol2")
    actives = os.path.join(parent, "actives_final.mol2")
    decoys = os.path.join(parent, "decoys_final.mol2")
    prot_file = os.path.join(out_path, "protein.mol2")

    # output protein
    with hs_io.HotspotReader(hotspot) as reader:
        hr = [h for h in reader.read() if h.identifier == "bestvol"][0]

    with MoleculeWriter(prot_file) as w:
        w.write(hr.protein)

    hspm = HotspotPharmacophoreModel.from_file(crossminer_file)
    constraint_str = hspm.to_gold_conf(score=score)

    # create template
    gold_conf_str = template(autoscale, crystal_ligand, actives, decoys, junk,
                             prot_file, constraint_str)
    print(gold_conf_str)
    with open(os.path.join(out_path, conf_name), "w") as w:
        w.write(gold_conf_str)

    #  linux only
    gold_exe = os.path.join(os.environ["GOLD_DIR"], "bin/gold_auto")

    # run docking
    with PushDir(out_path):
        cmd = f"{gold_exe} {conf_name}"
        os.system(cmd)

    # process results
    docked = MoleculeReader(os.path.join(junk, "docked_ligands.mol2"))
    # make it consistent with other names
    with MoleculeWriter(os.path.join(out_path, "docked_ligand.mol2")) as w:
        for d in docked:
            for atm in d.atoms:
                if atm.atomic_symbol == "Unknown":
                    d.remove_atom(atm)
            w.write(d)

    shutil.copyfile(os.path.join(junk, "bestranking.lst"),
                    os.path.join(out_path, "bestranking.lst"))
Exemplo n.º 18
0
def main():
    # input files #############################
    mol_file = "data/gold_docking_poses.sdf"
    hotspot_files = "data/out.zip"
    output_file = "data/ranked.sdf"

    # option 1: rank based on apolar score
    # sort_on = ["apolar"]
    # option 2: rank based on donor and acceptor scores
    sort_on = ["donor", "acceptor"]
    # option 3:
    # sort_on = ["simple_score"]
    ###########################################

    # read hotspots and molecules
    mols = [m for m in MoleculeReader(mol_file)
            ]  # so molecules can retain new attributes
    hr = HotspotReader(hotspot_files).read()

    for p, g in hr.super_grids.items():
        hr.super_grids[p] = g.max_value_of_neighbours()

    # create a grid which can contain all docking poses
    small_blank = Grid.initalise_grid(
        coords={atm.coordinates
                for mol in mols for atm in mol.heavy_atoms},
        padding=2)

    # set the protein to -1 to detect clashing
    protein_grid = hr.super_grids["apolar"].copy_and_clear()
    for atm in hr.protein.atoms:
        protein_grid.set_sphere(point=atm.coordinates,
                                radius=atm.vdw_radius * 0.9,
                                value=-1,
                                scaling='None')

    protein_grid = _shrink(small=small_blank, big=protein_grid)

    # shrink hotspot maps to save time
    sub_grids = {
        p: _shrink(small=small_blank, big=g) + protein_grid
        for p, g in hr.super_grids.items()
    }

    # score the mols
    for i, mol in enumerate(mols):
        scores = example_score(sub_grids, mol, small_blank)
        mol.data = scores

        simple = simple_score(hr, mol)
        mol.data.update({"simple_score": simple})

    ranked_mols = ranked_molecules(mols, sort_on)

    # output ranked mols in sdf format with data attached
    _output_sdf(ranked_mols, output_file)
Exemplo n.º 19
0
    def read(self, path):
        df = pd.read_csv(os.path.join(path, "hits_attr.csv"), index_col=0)

        mols = MoleculeReader(os.path.join(path, "hits_mols.mol2"))
        for mol in mols:
            rmsd = df.loc[df.identifier ==
                          mol.identifier]["rmsd"].values.tolist()[0]
            activity = df.loc[df.identifier ==
                              mol.identifier]["activity"].values.tolist()[0]
            self.hits.append(Hit(mol, rmsd, mol.identifier, activity))
Exemplo n.º 20
0
    def __init__(self):
        super(self.__class__, self).__init__(description=__doc__)
        # handle command line arguments
        self.add_argument('protein',
                          help='pdb_code of protein which was used in docking')

        self.add_argument('reference', help='pdb_code of reference')

        self.add_argument('chemical_id',
                          help='PDB identifier for the docked ligand')

        self.add_argument('results', help='path to results files')

        self.add_argument('-r',
                          '--chain_ref',
                          default='A',
                          help='Chain to used for alignment')

        self.add_argument('-p',
                          '--chain_protein',
                          default='A',
                          help='Chain to used for alignment')
        self.args = self.parse_args()
        self.tmp = tempfile.mkdtemp()

        # download protein
        PDBResult(self.args.protein).download(self.tmp)
        self.protein = Protein.from_file(
            os.path.join(self.tmp, self.args.protein + ".pdb"))
        self.protein.add_hydrogens()

        # download reference
        PDBResult(self.args.reference).download(self.tmp)
        ref = Protein.from_file(
            os.path.join(self.tmp, self.args.reference + ".pdb"))
        ref.add_hydrogens()

        self.ref = self._align(self.protein, ref)
        self.reference_ligand = self._extract_ligands(
            protein=self.ref,
            ligand=self.args.chemical_id,
            chain=self.args.chain_ref)[0]

        with MoleculeWriter(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "reference.mol2")) as w:
            w.write(self.reference_ligand)

        self.results = MoleculeReader(
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         self.args.results))

        self.rmsd_values = []
        for l in self.results:
            self.rmsd_values.append(self.rmsd(l, self.reference_ligand))
Exemplo n.º 21
0
def search_cocrystals():
    '''
    Search the whole CSD for structures that contain two different molecules
    with the specific settings
    '''
    csd = MoleculeReader('CSD')
    settings = search.Search.Settings()
    settings.only_organic = True
    settings.not_polymeric = True
    settings.has_3d_coordinates = True
    settings.no_disorder = True
    settings.no_errors = True
    settings.no_ions = True
    settings.no_metals = True
    mol = []
    fin = []
    for i, entry in enumerate(csd):
        if settings.test(entry):
            molecule = entry.identifier
            mol.append(molecule)
    csd_reader = MoleculeReader(mol)
    for i in csd_reader:
        id = i.identifier
        mol = csd_reader.molecule(id)
        smi = mol.smiles
        if smi != None:
            smi = smi.split('.')
            if len(Remove(smi)) == 2:
                # We make sure that the structure consist of two different molecules
                fin.append(mol.identifier)
    final_cocrystals = []
    # clean the list from solvents
    for mol1 in fin:
        mol = csd_reader.molecule(mol1)
        for i in range(0, (len(mol.components))):
            if mol.components[i].smiles in solvents():
                final_cocrystals.append(mol.identifier)
    final_cocrystals = Remove(final_cocrystals)
    final_cocrystals = [x for x in fin if x not in final_cocrystals]
    # Clean the list from polymorphs
    cocrystals = remove_polymorphs(final_cocrystals)
    return cocrystals
Exemplo n.º 22
0
def screen_molecules(screener, mols_to_screen, activity, conformers_dir,
                     output_name):
    """Run the ligand screener and write out the screened conformations.
    Return sorted list of ranked scores.

    :param screener:
    :param mols_to_screen: Screening set
    :param activity: 1 if the molecule is active, 0 if it's a decoy
    :param nconformers: Number of conformers to screen for each molecule in screening set
    :param nthreads: Number of threads on which to run the conformer generation
    :param output_name: File name for the result molecules
    :return: sorted list of ranked scores
    """
    screen_set = [m for m in MoleculeReader(mols_to_screen)
                  ]  ### Read the molecules to screen
    scores = []

    molwriter = MoleculeWriter(output_name)
    for mol in screen_set:
        mol_id = mol.identifier
        list_of_conformers_files = read_mol2_file(conformers_dir)
        for conformers_file in list_of_conformers_files:
            if conformers_file.startswith(mol_id):
                print(conformers_file)
                conformers_file_path = os.path.join(conformers_dir,
                                                    conformers_file)
                print(conformers_file_path)

                conformers = [[
                    x for x in MoleculeReader(conformers_file_path)
                ]]
                print(type(conformers))
                print("yeah!!!!!! start screening")
                res = screener.screen(conformers)  # Screening step
                scores.extend([(r.score, activity, r.identifier) for r in res])
                # Write results
                for r in res:
                    molwriter.write(r.molecule)
    molwriter.close()

    return sorted(scores)
 def docks_to_ref_rmsd(self):
     # Only calculate for complete docking results!
     docks = [l.molecule for l in self.docking_result.ligands]
     ref_lig = MoleculeReader(self.prepared_ligand_path)[0]
     rmsds = [
         MolecularDescriptors.rmsd(ref_lig,
                                   nd,
                                   exclude_hydrogens=True,
                                   atoms=self.match_heavy_atoms(
                                       ref_lig, nd)) for nd in docks
     ]
     return rmsds
Exemplo n.º 24
0
    def test_docking_fitting_pts(self):
        with PushDir("testdata/2vta"):
            # read hotspot maps
            with HotspotReader(path="out.zip") as r:
                self.result = r.read()

            mol = [
                m for m in MoleculeReader("crystal_ligand.sdf")
                if "LZ1" in m.identifier.split("_")
            ][0]
            print(mol.identifier)
            m = self.result._docking_fitting_pts(mol)
Exemplo n.º 25
0
def make_substructure_molecule(template_mol_path, query_mol_path):
    """

    :param template_mol: path to the prepared template molecule (starting fragment)
    :param query_mol: path to the prepared querty molecule (suggested followup)
    :return: string representation fo the MCS with 3D coordinates
    """
    #template_mol = [x for x in Chem.SDMolSupplier(template_mol_path, removeHs=False) if x is not None][0]
    template_mol_ccdc = MoleculeReader(template_mol_path)[0]
    template_mol = rdkitize_ccdc_mol(template_mol_ccdc)

    #query_mol = [y for y in Chem.SDMolSupplier(query_mol_path, removeHs=False, sanitize=False) if y is not None][0]
    #Chem.SanitizeMol(query_mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE)
    query_mol_ccdc = MoleculeReader(query_mol_path)[0]
    query_mol = rdkitize_ccdc_mol(query_mol_ccdc)
    print(query_mol)

    mcsResult=rdFMCS.FindMCS([template_mol, query_mol],threshold=0.9, completeRingsOnly=True)    #find the maximum common substructure

    if mcsResult.smartsString and len(mcsResult.smartsString)>0 :
        patt = Chem.MolFromSmarts(mcsResult.smartsString,mergeHs=True)

        # keep only the core of the reference molecule
        ref=AllChem.ReplaceSidechains(template_mol, patt)
        if ref:
            core=AllChem.DeleteSubstructs(ref,Chem.MolFromSmiles('*'))
            core.UpdatePropertyCache()
            try:
                return Chem.MolToMolBlock(core)
            except Exception as e:
                t_match = template_mol.GetSubstructMatch(patt)
                print(e)
                Chem.SanitizeMol(patt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE)
                cmap = {i:template_mol.GetConformer().GetAtomPosition(t_match[i]) for i in range(len(t_match))}
                GetFF=lambda x,confId=-1:AllChem.MMFFGetMoleculeForceField(x,AllChem.MMFFGetMoleculeProperties(x),confId=confId)
                n = AllChem.EmbedMolecule(patt,randomSeed=0xf00d,coordMap=cmap, maxAttempts=1000)
                AllChem.UFFOptimizeMolecule(patt)
                AllChem.AlignMol(patt,template_mol,atomMap = list(zip(range(len(t_match)),t_match)))
                return Chem.MolToMolBlock(patt)
Exemplo n.º 26
0
 def get_largest_binding_site(self):
     """
     Returns the binding site created within 6.5A of the largest ligand
     :return: 
     """
     lig_fname = self.find_largest_ligand()
     lig = MoleculeReader(join(self.lig_dir, lig_fname))[0]
     prot = Protein.from_file(
         join(self.pdb_dir, lig_fname.replace("sdf", "pdb")))
     bs = Protein.BindingSiteFromMolecule(protein=prot,
                                          molecule=lig,
                                          distance=6.5)
     return bs
Exemplo n.º 27
0
    def generate_fake(self, buriedness=False, weighted=False, superstar=True):
        """
        create a small set of grids for testing

        :param buriedness:
        :param weighted:
        :param superstar:
        :return:
        """

        def populate_grid(template, num_spheres, radius=1, value=8, scaling='linear'):
            h = template.copy_and_clear()
            for i in range(1, num_spheres):
                x, y, z = [np.random.randint(low=2, high=ax - 2, size=1) for ax in h.nsteps]

                h.set_sphere(point=h.indices_to_point(x, y, z),
                             radius=radius,
                             value=value,
                             scaling=scaling)

            return h

        protein = Protein.from_file("testdata/6y2g_A/binding_site.pdb")
        mol = MoleculeReader("testdata/6y2g_A/A_mol.mol2")[0]
        g = Grid.initalise_grid([a.coordinates for a in mol.atoms])

        if buriedness:
            buriedness_grid = Grid.from_molecule(mol)
        else:
            buriedness_grid = None

        interactions = ["apolar", "donor", "acceptor"]

        super_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions}

        if superstar:
            superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions}
        else:
            superstar_grids = None

        if weighted:
            weighted_superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions}
        else:
            weighted_superstar_grids = None

        return Results(super_grids=super_grids,
                       protein=protein,
                       buriedness=buriedness_grid,
                       superstar=superstar_grids,
                       weighted_superstar=weighted_superstar_grids)
Exemplo n.º 28
0
def main():
    base = "/local/pcurran/leads_frag"
    pdbs = [
        p for p in os.listdir(base) if os.path.isdir(os.path.join(base, p))
    ]

    for pdb in pdbs:
        hetid = MoleculeReader(os.path.join(
            base, pdb, f"{pdb}_ligand.mol2"))[0].identifier

        mol = ftp_download(pdb, hetid)

        with MoleculeWriter(os.path.join(base, pdb, f"{pdb}_ref.mol2")) as w:
            w.write(mol)
Exemplo n.º 29
0
def ligand_map_search(base, t, num):
    # inputs
    timer = Timer()

    with timer(tag="screen"):
        feature_db_file = os.path.join(base, t, f"structure_db/{t}.feat")
        query_file = os.path.join(base, t, f"ligand_pharmacophores/{num}.cm")
        actives = os.path.join(base, t, "actives_final.mol2")
        decoys = os.path.join(base, t, "decoys_final.mol2")

        # outputs
        output_dir = check_dir(
            os.path.join(base, t, f"ligand_pharmacophores/{num}"))
        hit_dir = check_dir(os.path.join(output_dir, "hit_list"))
        score_dir = check_dir(os.path.join(output_dir, "raw_scores"))
        time_file = os.path.join(output_dir, "time.txt")

        # # feature_db_file = "/home/pcurran/github_packages/pharmacophores/testdata/search/feat_db/test.feat"
        feat_db = Pharmacophore.FeatureDatabase.from_file(feature_db_file)
        query = Pharmacophore.Query.from_file(query_file)

        totals = {
            "actives": len(MoleculeReader(actives)),
            "decoys": len(MoleculeReader(decoys))
        }

        hits = search(feat_db, query)

    with open(time_file, "w") as f:
        timer.report(f)
    hits.write(hit_dir)

    # hits = HitList()
    # hits.read(hit_dir)

    estats = rank_hits(hits.hits, "rmsd", totals, num, t)
    estats.to_csv(os.path.join(output_dir, "enrichment_stats.csv"))
Exemplo n.º 30
0
    def test_func(self, unp_id):

        # Parse arguments
        overlay_dir = self.args.query
        active_dir = self.args.actives
        decoy_dir = self.args.decoys

        actives_to_screen = os.path.join(
            active_dir, '{}_active_3d_rdkit.sdf'.format(unp_id))
        print(actives_to_screen)
        decoys_to_screen = os.path.join(decoy_dir,
                                        '{}_decoy_3d_rdkit.sdf'.format(unp_id))
        print(decoys_to_screen)

        #nt = args.threads
        nc = self.args.nconfs
        output_dir = self.args.output_directory
        complete_output_dir = os.path.join(output_dir, '{}'.format(unp_id))
        overlay_mol = os.path.join(overlay_dir, '{}.sdf'.format(unp_id))
        print(overlay_mol)
        query = [m for m in MoleculeReader(overlay_mol)
                 ]  # Read the query mol or overlay of mols
        print("start screening")
        settings = setup_screener()
        os.makedirs(complete_output_dir)
        os.chdir(complete_output_dir)
        screener = Screener(
            query, settings=settings)  # Generate fields around the input query

        output_name_actives = os.path.join(
            complete_output_dir, "{}_actives_screened.mol2".format(unp_id))
        print(output_name_actives)
        output_name_decoys = os.path.join(
            complete_output_dir, "{}_decoys_screened.mol2".format(unp_id))
        print(output_name_decoys)
        actives_scores = screen_molecules(
            screener, actives_to_screen, 1, nc,
            output_name_actives)  ### Screen set of actives
        decoys_scores = screen_molecules(
            screener, decoys_to_screen, 0, nc,
            output_name_decoys)  ### Screen set of decoys
        print("writing scores")
        all_data = actives_scores
        all_data.extend(decoys_scores)
        screening_scores = sorted(all_data)

        output_name_scores = os.path.join(
            complete_output_dir, "{}_screening_scores.csv".format(unp_id))
        write_scores(screening_scores, output_name_scores)