Пример #1
0
def dump_sd_data(mol):
    print("Data Attached at the molecule level:")
    for dp in oechem.OEGetSDDataPairs(mol):
        print(dp.GetTag(), ":", dp.GetValue())
    if type(mol) == oechem.OEMol:
        print("\n\n" + 10 * "-" + "Data Attached to Conformers:")
        for conf_id, conf in enumerate(mol.GetConfs()):
            print("Data attached to conformer {}:".format(conf_id))
            for dp in oechem.OEGetSDDataPairs(conf):
                print(dp.GetTag(), ":", dp.GetValue())
    print()
Пример #2
0
def get_sd_list(mol, taglabel):
    """
    Get list of specified SD tag for all confs in mol.

    Parameters
    ----------
    mol : OEMol with N conformers
    taglabel : string
        tag from which to extract SD data

    Returns
    -------
    sdlist : list
        N-length list with value from SD tag

    """

    sd_list = []

    for j, conf in enumerate(mol.GetConfs()):
        for x in oechem.OEGetSDDataPairs(conf):
            if taglabel.lower() in x.GetTag().lower():
                sd_list.append(x.GetValue())
                break

    return sd_list
Пример #3
0
def SDF2CSV(ifs, csv):
    taglist = []
    # read through once to find all unique tags
    for mol in ifs.GetOEGraphMols():
        for dp in oechem.OEGetSDDataPairs(mol):
            if dp.GetTag() not in taglist:
                taglist.append(dp.GetTag())

    ifs.rewind()
    # print out column labels
    header = "Title"
    for tag in taglist:
        header += ",%s" % tag
    header += '\n'
    csv.write(header)

    # build csv file
    for mol in ifs.GetOEGraphMols():
        line = [mol.GetTitle()]
        for tag in taglist:
            if oechem.OEHasSDData(mol, tag):
                value = oechem.OEGetSDData(mol, tag)
            else:
                value = ''
            line.append(',')
            line.append(value)
        csv.write(''.join(line))
        csv.write('\n')
Пример #4
0
 def items(self):
     """this is likely not threadsafe - what happens if a data pair
        is added/deleted between iterations?
     """
     data = oechem.OEGetSDDataPairs(self._mol)
     for data_pair in data:
         yield data_pair.GetTag(), data_pair.GetValue()
Пример #5
0
def make_psi_input(mol, label, method, basisset, SPE=False, mem=None):
    """
    Parameters
    ----------
    mol: single OEChem conformer with coordinates
    label: string - name of the molecule. Can be an empty string.
    method: string - specification of method (see Psi4 website for options)
    basisset: string - specification of basis set
    SPE: boolean - False (default) for geom opt. True for single point E calcns
    mem: string - specify Psi4 job memory. E.g. "2 Gb" "2000 Mb" "2000000 Kb"

    Returns
    -------
    inputstring: string - containing contents of whole input file for this conf

    """
    inputstring = ""
    xyz = oechem.OEFloatArray(3)
    # specify memory requirements, if defined
    if mem != None:
        inputstring += "memory %s\n" % mem
    inputstring+=( 'molecule %s {\n' % label )
    # charge and multiplicity; multiplicity hardwired to singlet (usually is)
    netCharge = oechem.OENetCharge( mol)
    inputstring+=( '  %s 1' % netCharge )
    # get coordinates of each atom
    for atom in mol.GetAtoms():
        mol.GetCoords( atom, xyz)
        inputstring+=( '\n  %s %10.4f %10.4f  %10.4f' \
                       %(oechem.OEGetAtomicSymbol(atom.GetAtomicNum()),
                       xyz[0], xyz[1], xyz[2]) )
    inputstring+=( '\n  units angstrom\n}')
    # check if mol has a "freeze" tag
    for x in oechem.OEGetSDDataPairs(mol):
        if "atoms to freeze" in x.GetTag():
            freeze_list = x.GetValue()
            inputstring += "\n\nfreeze_list = \"\"\"\n  {} xyz\n  {} xyz\n  {} xyz\n  {} xyz\n\"\"\"".format(freeze_list[1], freeze_list[4],
                freeze_list[7], freeze_list[10])
            inputstring += "\nset optking frozen_cartesian $freeze_list"
            inputstring += "\nset optking dynamic_level = 1\nset optking consecutive_backsteps = 2\nset optking intrafrag_step_limit = 0.1\nset optking interfrag_step_limit = 0.1"
    # explicitly specify MP2 RI-auxiliary basis for Ahlrichs basis set
    # http://www.psicode.org/psi4manual/master/basissets_byfamily.html
    if method.lower()=='mp2' and 'def' in basisset and basisset.lower()!='def2-qzvpd':
        inputstring+=('\n\nset basis %s' % (basisset))
        inputstring+=('\nset df_basis_mp2 %s-ri' % (basisset))
        inputstring+=('\nset freeze_core True')
    else:
        inputstring+=('\n\nset basis %s' % (basisset))
        inputstring+=('\nset freeze_core True')
    # specify command for type of calculation
    if SPE is False:
        inputstring+=('\noptimize(\'%s\')' % (method))
    else:
        inputstring+=('\nenergy(\'%s\')' % (method))
    return inputstring
Пример #6
0
def sanitize_fragment(mol):
    approved_tags = [
        "TORSION_ATOMPROP",
        "TORSION_ATOMS_FRAGMENT",
        "TORSION_ATOMS_ParentMol",
        "COUNT",
    ]

    for dp in oechem.OEGetSDDataPairs(mol):
        if dp.GetTag() in approved_tags:
            continue
        oechem.OEDeleteSDData(mol, dp.GetTag())
Пример #7
0
def reorder_sd_props(mol: oechem.OEGraphMol):
    strain1 = oechem.OEGetSDData(mol, TOTAL_STRAIN_TAG)

    data_pairs = [(TOTAL_STRAIN_TAG, strain1)]
    for dp in oechem.OEGetSDDataPairs(mol):
        if dp.GetTag() == TOTAL_STRAIN_TAG:
            pass
        else:
            data_pairs.append((dp.GetTag(), dp.GetValue()))

    oechem.OEClearSDData(mol)

    for k, v in data_pairs:
        oechem.OESetSDData(mol, k, v)

    data_pairs = []
    for dp in oechem.OEGetSDDataPairs(mol):
        data_pairs.append((dp.GetTag(), dp.GetValue()))
    oechem.OEClearSDData(mol)

    for k, v in data_pairs:
        oechem.OESetSDData(mol, k, v)
Пример #8
0
    def process(self, mol, port):
        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)

            # Update cube simulation parameters with the eventually molecule SD tags
            new_args = {
                dp.GetTag(): dp.GetValue()
                for dp in oechem.OEGetSDDataPairs(mol)
                if dp.GetTag() in ["temperature", "pressure"]
            }

            if new_args:
                for k in new_args:
                    try:
                        new_args[k] = float(new_args[k])
                    except:
                        pass
                self.log.info(
                    "Updating parameters for molecule: {}\n{}".format(
                        mol.GetTitle(), new_args))

                opt.update(new_args)

            if utils.PackageOEMol.checkTags(mol, ['Structure']):
                gd = utils.PackageOEMol.unpack(mol)
                opt['outfname'] = '{}-{}'.format(gd['IDTag'],
                                                 self.opt['outfname'])

            mdData = utils.MDData(mol)

            opt['molecule'] = mol

            self.log.info('START NPT SIMULATION %s' % gd['IDTag'])
            simtools.simulation(mdData, **opt)

            packedmol = mdData.packMDData(mol)

            self.success.emit(packedmol)

        except Exception as e:
            # Attach error message to the molecule that failed
            self.log.error(traceback.format_exc())
            mol.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(mol)

        return
Пример #9
0
def get_sd_list(mol, datum, package='Psi4', method=None, basisset=None, taglabel=None):
    """
    Get list of specified SD tag for all confs in mol.

    Parameters
    ----------
    mol:        OEChem molecule with all of its conformers
    datum:       string description of property of interest
        options implemented: "QM opt energy" "MM opt energy"
    package:    software package used for QM calculation. Psi4 or Turbomole.
    method:     string, for specific properties. e.g. 'mp2'
    basisset:   string, for specific properties. e.g. '6-31+G(d)'
    taglabel : string
        exact tag string from which to extract SD data

    Returns
    -------
    sdlist: A 1D N-length list for N conformers with property from SDTag.
    """

    if taglabel is None:
        taglabel = define_tag(datum, package, method, basisset)

    sd_list = []
    for j, conf in enumerate(mol.GetConfs()):
        for x in oechem.OEGetSDDataPairs(conf):

            # Case: opt did not finish --> append nan
            if "note on opt." in x.GetTag().lower(
            ) and "did not finish" in x.GetValue().lower():
                sd_list.append('nan')
                break

            # Case: want energy value OR want original index number
            elif taglabel.lower() in x.GetTag().lower():
                sd_list.append(x.GetValue())
                break

    return sd_list
Пример #10
0
    def FilterMolData(self, mol):
        if not oechem.OEHasSDData(mol):
            return 0

        if self.fields is None:
            return -1

        if len(self.fields) == 0:
            oechem.OEClearSDData(mol)
            return 0

        validdata = 0
        deletefields = []
        for dp in oechem.OEGetSDDataPairs(mol):
            tag = dp.GetTag()
            if tag not in self.fields:
                deletefields.append(tag)
                continue

            value = oechem.OEGetSDData(mol, tag)
            if self.asFloating:
                try:
                    float(value)
                except ValueError:
                    oechem.OEThrow.Warning("Failed to convert %s to numeric value (%s) in %s" %
                                           (tag, value, mol.GetTitle()))
                    deletefields.append(tag)
                    continue

            validdata += 1

        if not validdata:
            oechem.OEClearSDData(mol)
        else:
            for nuke in deletefields:
                oechem.OEDeleteSDData(mol, nuke)

        return validdata
Пример #11
0
    def process(self, solute, port):

        try:
            opt = dict(self.opt)
            # Update cube simulation parameters with the eventually molecule SD tags
            new_args = {
                dp.GetTag(): dp.GetValue()
                for dp in oechem.OEGetSDDataPairs(solute)
                if dp.GetTag() in ["solvents", "molar_fractions", "density"]
            }
            if new_args:
                for k in new_args:
                    if k == 'molar_fractions':
                        continue
                    try:
                        new_args[k] = float(new_args[k])
                    except:
                        pass
                self.log.info(
                    "Updating parameters for molecule: {}\n{}".format(
                        solute.GetTitle(), new_args))
                opt.update(new_args)

            # Solvate the system
            sol_system = oesolvate(solute, **opt)
            self.log.info("Solvated System atom number {}".format(
                sol_system.NumAtoms()))
            sol_system.SetTitle(solute.GetTitle())
            self.success.emit(sol_system)
        except Exception as e:
            # Attach error message to the molecule that failed
            self.log.error(traceback.format_exc())
            solute.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(solute)

        return
Пример #12
0
            docked_molecule = oechem.OEGraphMol()
            oechem.OEReadMolecule(ifs, docked_molecule)

    if docked_molecule is None:
        print('No docking poses available')
        import sys
        sys.exit(0)

    import os
    from openeye import oechem, oedocking

    # Write molecule as CSV with cleared SD tags
    output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - docked.csv')
    if not os.path.exists(output_filename):
        docked_molecule_clean = docked_molecule.CreateCopy()
        for sdpair in oechem.OEGetSDDataPairs(docked_molecule_clean):
            if sdpair.GetTag() not in ['Hybrid2', 'fragments', 'site', 'docked_fragment']:
                oechem.OEDeleteSDData(docked_molecule_clean, sdpair.GetTag())
        with oechem.oemolostream(output_filename) as ofs:
            oechem.OEWriteMolecule(ofs, docked_molecule_clean)

    # Write molecule as SDF
    output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.sdf')
    if not os.path.exists(output_filename):
        with oechem.oemolostream(output_filename) as ofs:
            oechem.OEWriteMolecule(ofs, docked_molecule)

    # Write molecule as mol2
    output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.mol2')
    if not os.path.exists(output_filename):
        with oechem.oemolostream(output_filename) as ofs:
Пример #13
0
 def keys(self):
     """this is likely not threadsafe - what happens if a data pair
        is added/deleted between iterations?
     """
     for data_pair in oechem.OEGetSDDataPairs(self._mol):
         yield data_pair.GetTag()
Пример #14
0
def construct_dihedral_energy_profile(torsion_conformers, num_points=24):
    angle_list = np.array([360 * i / num_points for i in range(num_points)])

    num_confs = 0
    profile = np.full(num_points, np.nan)
    for mol in torsion_conformers:
        if not mol:
            continue
        num_confs += 1
        conf = mol.GetActive()
        conf_title = get_sd_data(conf, "CONFORMER_LABEL")
        tor_atoms = get_sd_data(mol, "TORSION_ATOMS_ParentMol").split()
        parent_name = conf_title[:-3]
        dih_label = "_".join(str(x) for x in tor_atoms)
        fragment_label = parent_name + "_" + dih_label
        angle_idx = int(conf_title[-2:])

        profile[angle_idx] = np.float(get_sd_data(conf, "PSI4_ENERGY"))
        logging.debug("angle_idx: %d", angle_idx)
        logging.debug("Psi4 Energy: %f",
                      float(get_sd_data(conf, "PSI4_ENERGY")))

    # check for angles where no energies are available
    for angle in angle_list[np.all(np.isnan(profile))]:
        logging.warning(
            "Warning: No energies found for angle {:.1f} for fragment: {}".
            format(angle, fragment_label))

    # calculate relative energies
    min_energy = np.nanmin(profile)
    profile -= min_energy
    profile[np.isnan(profile)] = -1  # set nans to -1
    torsional_strain = np.column_stack((angle_list, profile))

    # combine conformers
    output_conformers = oechem.OEMol(torsion_conformers[0])
    output_conformers.DeleteConfs()
    title = fragment_label
    output_conformers.SetTitle(title)

    # setup normalization
    torsion_tag = "TORSION_ATOMS_FRAGMENT"
    torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split()
    print(torsion_atoms_in_fragment)
    dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment]
    dih, _ = get_dihedral(output_conformers, dihedral_atom_indices)

    for old_conf in torsion_conformers:
        if old_conf:
            new_conf = output_conformers.NewConf(old_conf)
            normalize_coordinates(new_conf, dih)
            oechem.OEClearSDData(new_conf)
            for dp in oechem.OEGetSDDataPairs(old_conf.GetActive()):
                if dp.GetTag() not in ["OEConfTitle", "CONFORMER_LABEL"]:
                    oechem.OESetSDData(new_conf, dp.GetTag(), dp.GetValue())
            torsion_angle = get_sd_data(old_conf, "TORSION_ANGLE")
            title = fragment_label + ": Angle " + torsion_angle
            new_conf.SetTitle(title)

    write_energy_profile_to_sddata(output_conformers, torsional_strain.copy())

    # Calculate all possible torsion inchi keys for this fragment
    torsion_inchi_list = []
    inchi_key = oechem.OECreateInChIKey(output_conformers)
    _, b, c, _ = get_torsion_oeatom_list(output_conformers)
    for a in b.GetAtoms(oechem.OEIsHeavy()):
        for d in c.GetAtoms(oechem.OEIsHeavy()):
            if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx():
                continue

            torsion_inchi = inchi_key + get_modified_inchi_key(
                output_conformers, [a, b, c, d])
            torsion_inchi_list.append(torsion_inchi)

    return output_conformers, torsional_strain, torsion_inchi_list
Пример #15
0
def IdentifyMinima(mol, tag, ThresholdE, ThresholdRMSD):
    """
    For a molecule's set of conformers computed with some level of theory,
        whittle down unique conformers based on energy and RMSD.

    Parameters
    ----------
    mol           OEChem molecule with all of its conformers
    tag           string name of the SD tag in this molecule
    ThresholdE    float value for abs(E1-E2), below which 2 confs are "same"
        Units are hartrees (default output units of Psi4)
    ThresholdR    float value for RMSD, below which 2 confs are "same"
        Units are in Angstrom (Psi4 default)

    Returns
    -------
    boolean True if successful filter + delete. False if there's only
        one conf and it didn't optimize, or something else funky.

    """
    # Parameters for OpenEye RMSD calculation
    automorph = True
    heavyOnly = False
    overlay = True

    # declare variables for conformers to delete
    confsToDel = set()
    delCount = 0

    # check if SD tag exists for the case of single conformer
    if mol.NumConfs() == 1:
        testmol = mol.GetConfs().next()
        for x in oechem.OEGetSDDataPairs(mol):
            if tag.lower() in x.GetTag().lower():
                return True
            else:
                return False

    # Loop over conformers twice (NxN diagonal comparison of RMSDs)
    for confRef in mol.GetConfs():
        print(" ~ Reference: %s conformer %d" %
              (mol.GetTitle(), confRef.GetIdx() + 1))

        # get real tag (correct for capitalization)
        for x in oechem.OEGetSDDataPairs(confRef):
            if tag.lower() in x.GetTag().lower():
                taglabel = x.GetTag()

        # delete cases that don't have energy (opt not converged; or other)
        if not oechem.OEHasSDData(confRef, taglabel):
            confsToDel.add(confRef.GetIdx())
            delCount += 1
            continue
        refE = float(oechem.OEGetSDData(confRef, taglabel))

        for confTest in mol.GetConfs():
            # upper right triangle comparison
            if confTest.GetIdx() <= confRef.GetIdx():
                continue
            # skip cases already set for removal
            if confTest.GetIdx() in confsToDel:
                continue
            # delete cases that don't have energy
            if not oechem.OEHasSDData(confTest, taglabel):
                confsToDel.add(confTest.GetIdx())
                continue

            testE = float(oechem.OEGetSDData(confTest, taglabel))
            # if MM (not Psi4) energies, convert absERel to Hartrees
            if 'mm' in taglabel.lower():
                absERel = abs(refE - testE) / 627.5095
            else:
                absERel = abs(refE - testE)
            # if energies are diff enough --> confs are diff --> keep & skip ahead
            if absERel > ThresholdE:
                continue
            # if energies are similar, see if they are diff by RMSD
            rmsd = oechem.OERMSD(confRef, confTest, automorph, heavyOnly,
                                 overlay)
            # if measured_RMSD < threshold_RMSD --> confs are same --> delete
            if rmsd < ThresholdRMSD:
                confsToDel.add(confTest.GetIdx())

    # for the same molecule, delete tagged conformers
    print("%s original number of conformers: %d" %
          (mol.GetTitle(), mol.NumConfs()))
    if delCount == mol.NumConfs():
        # all conformers in this mol has been tagged for deletion
        return False
    for conf in mol.GetConfs():
        if conf.GetIdx() in confsToDel:
            print('Removing %s conformer index %d' %
                  (mol.GetTitle(), conf.GetIdx()))
            if not mol.DeleteConf(conf):
                oechem.OEThrow.Fatal("Unable to delete %s GetIdx() %d" \
                                  % (mol.GetTitle(), conf.GetIdx()))
    return True
Пример #16
0
            files_missing = False
            for phase in ['complex', 'ligand']:
                for ext in ['gro', 'top']:
                    filename = os.path.join(
                        gromacs_basedir,
                        f'{molecule.GetTitle()} - {phase}.{ext}')
                    if not os.path.exists(filename):
                        files_missing = True
            if files_missing:
                continue

            # Add RUN number
            oechem.OESetSDData(molecule, 'run', f'RUN{run_index}')

            if args.clean:
                for sdpair in oechem.OEGetSDDataPairs(molecule):
                    if sdpair.GetTag() not in [
                            'Hybrid2', 'docked_fragment', 'fragments', 'site',
                            'run'
                    ]:
                        oechem.OEDeleteSDData(molecule, sdpair.GetTag())

            # Copy files
            run_dir = os.path.join(args.docked_basedir, 'fah-gromacs',
                                   f'RUN{run_index}')
            os.makedirs(run_dir, exist_ok=True)
            import shutil
            for phase in ['complex', 'ligand']:
                for ext in ['gro', 'top']:
                    src = os.path.join(
                        gromacs_basedir,
Пример #17
0
 def checkSDData(molecule):
     """ Returns a dictionary of the SD Data from the OEMol """
     sd_data = {}
     for dp in oechem.OEGetSDDataPairs(molecule):
         sd_data[dp.GetTag()] = dp.GetValue()
     return sd_data
Пример #18
0
    def process(self, solvated_system, port):

        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)

            # Split the complex in components
            protein, solute, water, excipients = oeommutils.split(solvated_system, ligand_res_name='LIG')

            # Update cube simulation parameters with the eventually molecule SD tags
            new_args = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solute) if dp.GetTag() in
                        ["temperature", "pressure"]}

            if new_args:
                for k in new_args:
                    try:
                        new_args[k] = float(new_args[k])
                    except:
                        pass
                self.log.info("Updating parameters for molecule: {}\n{}".format(solute.GetTitle(), new_args))
                opt.update(new_args)

            # Extract the MD data
            mdData = data_utils.MDData(solvated_system)
            solvated_structure = mdData.structure

            # Extract the ligand parmed structure
            solute_structure = solvated_structure.split()[0][0]
            solute_structure.box = None

            # Set the ligand title
            solute.SetTitle(solvated_system.GetTitle())

            # Create the solvated and vacuum system
            solvated_omm_sys = solvated_structure.createSystem(nonbondedMethod=app.PME,
                                                               nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms,
                                                               constraints=app.HBonds,
                                                               removeCMMotion=False)

            solute_omm_sys = solute_structure.createSystem(nonbondedMethod=app.NoCutoff,
                                                           constraints=app.HBonds,
                                                           removeCMMotion=False)

            # This is a note from:
            # https://github.com/MobleyLab/SMIRNOFF_paper_code/blob/e5012c8fdc4570ca0ec750f7ab81dd7102e813b9/scripts/create_input_files.py#L114
            # Fix switching function.
            for force in solvated_omm_sys.getForces():
                if isinstance(force, openmm.NonbondedForce):
                    force.setUseSwitchingFunction(True)
                    force.setSwitchingDistance((opt['nonbondedCutoff'] - 1.0) * unit.angstrom)

            # Write out all the required files and set-run the Yank experiment
            with TemporaryDirectory() as output_directory:

                opt['Logger'].info("Output Directory {}".format(output_directory))

                solvated_structure_fn = os.path.join(output_directory, "solvated.pdb")
                solvated_structure.save(solvated_structure_fn, overwrite=True)

                solute_structure_fn = os.path.join(output_directory, "solute.pdb")
                solute_structure.save(solute_structure_fn, overwrite=True)

                solvated_omm_sys_serialized = XmlSerializer.serialize(solvated_omm_sys)
                solvated_omm_sys_serialized_fn = os.path.join(output_directory, "solvated.xml")
                solvated_f = open(solvated_omm_sys_serialized_fn, 'w')
                solvated_f.write(solvated_omm_sys_serialized)
                solvated_f.close()

                solute_omm_sys_serialized = XmlSerializer.serialize(solute_omm_sys)
                solute_omm_sys_serialized_fn = os.path.join(output_directory, "solute.xml")
                solute_f = open(solute_omm_sys_serialized_fn, 'w')
                solute_f.write(solute_omm_sys_serialized)
                solute_f.close()

                # Build the Yank Experiment
                yaml_builder = ExperimentBuilder(yank_solvation_template.format(
                                                 verbose='yes' if opt['verbose'] else 'no',
                                                 minimize='yes' if opt['minimize'] else 'no',
                                                 output_directory=output_directory,
                                                 timestep=opt['timestep'],
                                                 nsteps_per_iteration=opt['nsteps_per_iteration'],
                                                 number_iterations=opt['iterations'],
                                                 temperature=opt['temperature'],
                                                 pressure=opt['pressure'],
                                                 solvated_pdb_fn=solvated_structure_fn,
                                                 solvated_xml_fn=solvated_omm_sys_serialized_fn,
                                                 solute_pdb_fn=solute_structure_fn,
                                                 solute_xml_fn=solute_omm_sys_serialized_fn))

                # Run Yank
                yaml_builder.run_experiments()

                exp_dir = os.path.join(output_directory, "experiments")

                # Calculate solvation free energy, solvation Enthalpy and their errors
                DeltaG_solvation, dDeltaG_solvation, DeltaH, dDeltaH = yankutils.analyze_directory(exp_dir)

                # # Add result to the original molecule in kcal/mol
                oechem.OESetSDData(solute, 'DG_yank_solv', str(DeltaG_solvation))
                oechem.OESetSDData(solute, 'dG_yank_solv', str(dDeltaG_solvation))

            # Emit the ligand
            self.success.emit(solute)

        except Exception as e:
            # Attach an error message to the molecule that failed
            self.log.error(traceback.format_exc())
            solvated_system.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(solvated_system)

        return
Пример #19
0
def DumpSDData(mol):
    logging.info(("SD data of", mol.GetTitle()))
    #loop over SD data
    for dp in oechem.OEGetSDDataPairs(mol):
        logging.info((dp.GetTag(), ':', dp.GetValue()))
    logging.info()
Пример #20
0
def make_psi_input(mol, label, method, basisset, calctype='opt', mem=None):
    """
    Get coordinates from input mol, and generate/format input text for
    Psi4 calculation.

    Parameters
    ----------
    mol : OpenEye OEMol
        OEMol with coordinates
    label : string
        Name of molecule with integer identifier (for conformers).
    method: string
        Name of the method as understood by Psi4. Example: "mp2"
    basis : string
        Name of the basis set as understood by Psi4. Example: "def2-sv(p)"
    calctype : string
        What kind of Psi4 calculation to run. Supported inputs are:
        'opt' for geometry optimization,
        'spe' for single point energy calculation, and
        'hess' for Hessian calculation.
    memory : string
        How much memory each Psi4 job should take. If not specified, the
        default in Psi4 is 500 Mb. Examples: "2000 MB" "1.5 GB"
        http://www.psicode.org/psi4manual/master/psithoninput.html

    Returns
    -------
    inputstring : string
        Contents of Psi4 input file to be written out

    """

    # check that specified calctype is valid
    if calctype not in {'opt', 'spe', 'hess'}:
        sys.exit("Specify a valid calculation type.")

    inputstring = ""

    # specify memory requirements, if defined
    if mem != None:
        inputstring += "memory %s\n" % mem
    inputstring += ('molecule %s {\n' % label)

    # charge and multiplicity; multiplicity hardwired to singlet (usually is)
    netCharge = oechem.OENetCharge(mol)
    inputstring += ('  %s 1' % netCharge)

    # get atomic symbol and coordinates of each atom
    xyz = oechem.OEFloatArray(3)
    for atom in mol.GetAtoms():
        mol.GetCoords(atom, xyz)
        inputstring+=( '\n  %s %10.4f %10.4f  %10.4f' \
                       %(oechem.OEGetAtomicSymbol(atom.GetAtomicNum()),
                       xyz[0], xyz[1], xyz[2]) )
    inputstring += ('\n  units angstrom\n}')

    # check if mol has a "freeze" tag
    for x in oechem.OEGetSDDataPairs(mol):
        if calctype == "opt" and "atoms to freeze" in x.GetTag():
            b = x.GetValue()
            y = b.replace("[", "")
            z = y.replace("]", "")
            a = z.replace(" ", "")
            freeze_list = a.split(",")
            inputstring += (
                "\n\nfreeze_list = \"\"\"\n  {} xyz\n  {} xyz\n  {} "
                "xyz\n  {} xyz\n\"\"\"".format(freeze_list[0], freeze_list[1],
                                               freeze_list[2], freeze_list[3]))
            inputstring += "\nset optking frozen_cartesian $freeze_list"
            inputstring += (
                "\nset optking dynamic_level = 1\nset optking "
                "consecutive_backsteps = 2\nset optking intrafrag_step_limit = "
                "0.1\nset optking interfrag_step_limit = 0.1\n")

    # best practices for scf calculations
    # http://www.psicode.org/psi4manual/master/scf.html#recommendations
    # http://www.psicode.org/psi4manual/master/dft.html#recommendations
    inputstring += '\n\nset scf_type df'
    inputstring += '\nset guess sad'

    # explicitly specify MP2 RI-auxiliary basis for [Ahlrichs] basis set
    # http://www.psicode.org/psi4manual/master/basissets_byfamily.html
    # DFMP2 *should* get MP2 aux sets fine for [Pople and Dunning] sets
    # http://www.psicode.org/psi4manual/master/dfmp2.html
    if method.lower() == 'mp2' and 'def2' in basisset:
        if basisset.lower() == 'def2-sv(p)':
            inputstring += ('\nset df_basis_mp2 def2-sv_p_-ri')
        elif basisset.lower() != 'def2-qzvpd':  # no aux set for qzvpd 10-6-18
            inputstring += ('\nset df_basis_mp2 %s-ri' % (basisset))

    inputstring += ('\n\nset basis %s' % (basisset))
    inputstring += ('\nset freeze_core True')
    # specify command for type of calculation
    if calctype == 'opt':
        inputstring += ('\noptimize(\'%s\')\n\n' % (method))
    elif calctype == 'spe':
        inputstring += ('\nenergy(\'%s\')\n\n' % (method))
    elif calctype == 'hess':
        inputstring += (
            '\nH, wfn = hessian(\'%s\', return_wfn=True)\nwfn.hessian().print_out()\n\n'
            % (method))

    return inputstring
Пример #21
0
    def process(self, solvated_system, port):

        try:
            opt = dict(self.opt)

            # Extract the solvated ligand and the solvated complex
            solvated_ligand = solvated_system[0]
            solvated_complex = solvated_system[1]

            # Update cube simulation parameters with the eventually molecule SD tags
            new_args = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solvated_ligand) if dp.GetTag() in
                        ["temperature", "pressure"]}
            if new_args:
                for k in new_args:
                    try:
                        new_args[k] = float(new_args[k])
                    except:
                        pass
                self.log.info("Updating parameters for molecule: {}\n{}".format(solvated_ligand.GetTitle(), new_args))
                opt.update(new_args)

            # Extract the MD data
            mdData_ligand = data_utils.MDData(solvated_ligand)
            solvated_ligand_structure = mdData_ligand.structure

            mdData_complex = data_utils.MDData(solvated_complex)
            solvated_complex_structure = mdData_complex.structure

            # Create the solvated OpenMM systems
            solvated_complex_omm_sys = solvated_complex_structure.createSystem(nonbondedMethod=app.PME,
                                                                               nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms,
                                                                               constraints=app.HBonds,
                                                                               removeCMMotion=False)

            solvated_ligand_omm_sys = solvated_ligand_structure.createSystem(nonbondedMethod=app.PME,
                                                                             nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms,
                                                                             constraints=app.HBonds,
                                                                             removeCMMotion=False)

            # Write out all the required files and set-run the Yank experiment
            with TemporaryDirectory() as output_directory:

                opt['Logger'].info("Output Directory {}".format(output_directory))

                solvated_complex_structure_fn = os.path.join(output_directory, "complex.pdb")
                solvated_complex_structure.save(solvated_complex_structure_fn, overwrite=True)

                solvated_ligand_structure_fn = os.path.join(output_directory, "solvent.pdb")
                solvated_ligand_structure.save(solvated_ligand_structure_fn, overwrite=True)

                solvated_complex_omm_serialized = XmlSerializer.serialize(solvated_complex_omm_sys)
                solvated_complex_omm_serialized_fn = os.path.join(output_directory, "complex.xml")
                solvated_complex_f = open(solvated_complex_omm_serialized_fn, 'w')
                solvated_complex_f.write(solvated_complex_omm_serialized)
                solvated_complex_f.close()

                solvated_ligand_omm_serialized = XmlSerializer.serialize(solvated_ligand_omm_sys)
                solvated_ligand_omm_serialized_fn = os.path.join(output_directory, "solvent.xml")
                solvated_ligand_f = open(solvated_ligand_omm_serialized_fn, 'w')
                solvated_ligand_f.write(solvated_ligand_omm_serialized)
                solvated_ligand_f.close()

                # Build the Yank Experiment
                yaml_builder = ExperimentBuilder(yank_binding_template.format(
                    verbose='yes' if opt['verbose'] else 'no',
                    minimize='yes' if opt['minimize'] else 'no',
                    output_directory=output_directory,
                    timestep=opt['timestep'],
                    nsteps_per_iteration=opt['nsteps_per_iteration'],
                    number_iterations=opt['iterations'],
                    temperature=opt['temperature'],
                    pressure=opt['pressure'],
                    complex_pdb_fn=solvated_complex_structure_fn,
                    complex_xml_fn=solvated_complex_omm_serialized_fn,
                    solvent_pdb_fn=solvated_ligand_structure_fn,
                    solvent_xml_fn=solvated_ligand_omm_serialized_fn,
                    restraints=opt['restraints'],
                    ligand_resname=opt['ligand_resname']))

                # Run Yank
                yaml_builder.run_experiments()

                exp_dir = os.path.join(output_directory, "experiments")

                DeltaG_binding, dDeltaG_binding, DeltaH, dDeltaH = yankutils.analyze_directory(exp_dir)

                protein, ligand, water, excipients = oeommutils.split(solvated_ligand,
                                                                      ligand_res_name=opt['ligand_resname'])
                # Add result to the extracted ligand in kcal/mol
                oechem.OESetSDData(ligand, 'DG_yank_binding', str(DeltaG_binding))
                oechem.OESetSDData(ligand, 'dG_yank_binding', str(dDeltaG_binding))

            self.success.emit(ligand)

        except Exception as e:
            # Attach an error message to the molecule that failed
            self.log.error(traceback.format_exc())
            solvated_system[1].SetData('error', str(e))
            # Return failed mol
            self.failure.emit(solvated_system[1])

        return 
Пример #22
0
 def has_dist(mol):
     for sdpair in oechem.OEGetSDDataPairs(mol):
         tag = sdpair.GetTag()
         if 'dist' in tag:
             return True
     return False
Пример #23
0
def generate_fragalysis(
    series: CompoundSeriesAnalysis,
    fragalysis_config: FragalysisConfig,
    results_path: str,
) -> None:
    """
    Generate input and upload to fragalysis from fragalysis_config

    Fragalysis spec:https://discuss.postera.ai/t/providing-computed-poses-for-others-to-look-at/1155/8?u=johnchodera​

    Parameters
    ----------
    series : CompoundSeriesAnalysis
        Analysis results
    fragalysis_config : FragalysisConfig
        Fragalysis input paramters
    results_path : str
        The path to the results
    """

    import os
    from openeye import oechem
    from rich.progress import track

    # make a directory to store fragalysis upload data
    fa_path = os.path.join(results_path, "fragalysis_upload")
    os.makedirs(fa_path, exist_ok=True)

    ref_mols = fragalysis_config.ref_mols  # e.g. x12073
    ref_pdb = fragalysis_config.ref_pdb  # e.g. x12073

    # set paths
    ligands_path = os.path.join(results_path,
                                fragalysis_config.ligands_filename)
    fa_ligands_path = os.path.join(fa_path,
                                   fragalysis_config.fragalysis_sdf_filename)

    # copy sprint generated sdf to new name for fragalysis input
    from shutil import copyfile

    copyfile(ligands_path, fa_ligands_path)

    # Read ligand poses
    molecules = []

    with oechem.oemolistream(ligands_path) as ifs:
        oemol = oechem.OEGraphMol()
        while oechem.OEReadMolecule(ifs, oemol):
            molecules.append(oemol.CreateCopy())
    print(f"{len(molecules)} ligands read")

    # Get zipped PDB if specified
    if fragalysis_config.ref_pdb == "references.zip":
        consolidate_protein_snapshots_into_pdb(
            oemols=molecules,
            results_path=results_path,
            pdb_filename="references.pdb",
            fragalysis_input=True,
            fragalysis_path=fa_path,
        )

    descriptions = {
        "DDG (kcal/mol)":
        "Relative computed free energy difference",
        "dDDG (kcal/mol)":
        "Uncertainty in computed relative free energy difference",
        "ref_mols":
        "a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)",
        "ref_pdb":
        "The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose",
        "original SMILES":
        "the original SMILES of the compound before any computation was carried out",
    }

    # Preprocess molecules
    tags_to_retain = {"DDG (kcal/mol)", "dDDG (kcal/mol)"}
    index = 0
    for oemol in track(molecules, "Preprocessing molecules for Fragalysis..."):
        # Remove hydogrens
        oechem.OESuppressHydrogens(oemol, True)
        # Get original SMILES
        original_smiles = oechem.OEGetSDData(oemol, "SMILES")
        # Remove irrelevant SD tags
        for sdpair in oechem.OEGetSDDataPairs(oemol):
            tag = sdpair.GetTag()
            value = sdpair.GetValue()
            if tag not in tags_to_retain:
                oechem.OEDeleteSDData(oemol, tag)
        # Add required SD tags
        oechem.OESetSDData(oemol, "ref_mols", fragalysis_config.ref_mols)

        # If ref_pdb is zip file, use this
        if fragalysis_config.ref_pdb == "references.zip":
            oechem.OESetSDData(oemol, "ref_pdb",
                               f"references/references_{index}.pdb"),
            index += 1
        else:
            oechem.OESetSDData(oemol, "ref_pdb", fragalysis_config.ref_pdb)

        oechem.OESetSDData(oemol, "original SMILES", original_smiles)

    # Add initial blank molecule (that includes distances)
    import copy
    from datetime import datetime

    # Find a molecule that includes distances, if present
    oemol = molecules[0].CreateCopy()
    # Add descriptions to each SD field
    for sdpair in oechem.OEGetSDDataPairs(oemol):
        tag = sdpair.GetTag()
        value = sdpair.GetValue()
        oechem.OESetSDData(oemol, tag, descriptions[tag])

    # Add other fields
    oemol.SetTitle("ver_1.2")
    oechem.OESetSDData(oemol, "ref_url", fragalysis_config.ref_url)
    oechem.OESetSDData(oemol, "submitter_name",
                       fragalysis_config.submitter_name)
    oechem.OESetSDData(oemol, "submitter_email",
                       fragalysis_config.submitter_email)
    oechem.OESetSDData(oemol, "submitter_institution",
                       fragalysis_config.submitter_institution)
    oechem.OESetSDData(oemol, "generation_date",
                       datetime.today().strftime("%Y-%m-%d"))
    oechem.OESetSDData(oemol, "method", fragalysis_config.method)
    molecules.insert(0, oemol)  # make it first molecule

    # Write sorted molecules
    with oechem.oemolostream(fa_ligands_path) as ofs:
        for oemol in track(molecules,
                           description="Writing Fragalysis SDF file..."):
            oechem.OEWriteMolecule(ofs, oemol)

    # TODO add check SDF step here?

    # Upload to fragalysis
    print("Uploading to Fragalysis...")
    print(f"--> Target: {fragalysis_config.target_name}")

    from fragalysis_api.xcextracter.computed_set_update import update_cset, REQ_URL

    if fragalysis_config.new_upload:
        update_set = "None"  # new upload
        print(f"--> Uploading a new set")
    else:
        update_set = ("".join(fragalysis_config.submitter_name.split()) + "-" +
                      "".join(fragalysis_config.method.split()))

        print(f"--> Updating set: {update_set}")

    if fragalysis_config.ref_pdb == "references.zip":
        pdb_zip_path = os.path.join(fa_path, "references.zip")
    else:
        pdb_zip_path = None

    taskurl = update_cset(
        REQ_URL,
        target_name=fragalysis_config.target_name,
        sdf_path=fa_ligands_path,
        pdb_zip_path=pdb_zip_path,
        update_set=update_set,
        upload_key=fragalysis_config.upload_key,
        submit_choice=1,
        add=False,
    )

    print(f"Upload complete, check upload status: {taskurl}")
Пример #24
0
def get_sd_list(mol, datum, Package='Psi4', Method=None, Basisset=None):
    """
    Get list of specified SD tag for all confs in mol.

    Parameters
    ----------
    mol:        OEChem molecule with all of its conformers
    datum:       string description of property of interest
        options implemented: "QM opt energy" "MM opt energy"
    Package:    software package used for QM calculation. Psi4 or Turbomole.
    Method:     string, for specific properties. e.g. 'mp2'
    Basisset:   string, for specific properties. e.g. '6-31+G(d)'

    Returns
    -------
    sdlist: A 1D N-length list for N conformers with property from SDTag.
    """

    # TODO: dictionary
    if datum == "QM opt energy":
        taglabel = "QM %s Final Opt. Energy (Har) %s/%s" % (Package, Method,
                                                            Basisset)
    if datum == "QM opt energy scs":
        taglabel = "QM %s Final Opt. Energy (Har) SCS-%s/%s" % (
            Package, Method, Basisset)
    if datum == "QM opt energy initial":
        taglabel = "QM %s Initial Opt. Energy (Har) %s/%s" % (Package, Method,
                                                              Basisset)
    if datum == "QM spe":
        taglabel = "QM %s Single Pt. Energy (Har) %s/%s" % (Package, Method,
                                                            Basisset)
    if datum == "QM spe scs":
        taglabel = "QM %s Single Pt. Energy (Har) SCS-%s/%s" % (
            Package, Method, Basisset)
    if datum == "MM opt energy":
        taglabel = "MM Szybki Newton Energy"

    if datum == "original index":
        taglabel = "Original omega conformer number"

    if datum == "opt runtime":
        taglabel = "QM %s Opt. Runtime (sec) %s/%s" % (Package, Method,
                                                       Basisset)
    if datum == "spe runtime":
        taglabel = "QM %s Single Pt. Runtime (sec) %s/%s" % (Package, Method,
                                                             Basisset)
    if datum == "opt step":
        taglabel = "QM %s Opt. Steps %s/%s" % (Package, Method, Basisset)

    try:
        taglabel
    # "local var referenced before assignment"
    except UnboundLocalError as e:  # lgtm [py/unreachable-statement]
        sys.exit("Error in input tag of extracting SD data.")

    SDList = []
    for j, conf in enumerate(mol.GetConfs()):
        for x in oechem.OEGetSDDataPairs(conf):
            # Case: opt did not finish --> append nan
            if "note on opt." in x.GetTag().lower(
            ) and "did not finish" in x.GetValue().lower():
                SDList.append('nan')
                break
            # Case: want energy value OR want original index number
            elif taglabel.lower() in x.GetTag().lower():
                SDList.append(x.GetValue())
                break
    return SDList
Пример #25
0
from openeye import oegraphsim

# @ <SNIPPET-SDF2FP>
if len(sys.argv) != 2:
    oechem.OEThrow.Usage("%s <infile>" % sys.argv[0])

ifs = oechem.oemolistream()
if not ifs.open(sys.argv[1]):
    oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1])
if ifs.GetFormat() != oechem.OEFormat_SDF:
    oechem.OEThrow.Fatal("%s input file has to be an SDF file" % sys.argv[1])

molcounter = 0
fpcounter = 0
for mol in ifs.GetOEGraphMols():
    molcounter += 1
    for dp in oechem.OEGetSDDataPairs(mol):
        if oegraphsim.OEIsValidFPTypeString(dp.GetTag()):
            fpcounter += 1
            fptypestr = dp.GetTag()
            fphexdata = dp.GetValue()

            fp = oegraphsim.OEFingerPrint()
            fptype = oegraphsim.OEGetFPType(fptypestr)
            fp.SetFPTypeBase(fptype)
            fp.FromHexString(fphexdata)

print("Number of molecules = %d" % molcounter)
print("Number of fingerprints = %d" % fpcounter)
# @ </SNIPPET-SDF2FP>
def DumpSDData(mol):
    print("SD data of", mol.GetTitle())
    #loop over SD data
    for dp in oechem.OEGetSDDataPairs(mol):
        print(dp.GetTag(), ':', dp.GetValue())
    print()
Пример #27
0
def KeepProps(proplist, ifs, ofs):
    for mol in ifs.GetOEGraphMols():
        for dp in oechem.OEGetSDDataPairs(mol):
            if dp.GetTag() not in proplist:
                oechem.OEDeleteSDData(mol, dp.GetTag())
        oechem.OEWriteMolecule(ofs, mol)