Пример #1
0
class AxesOfInertia(ObjectiveProvider):

    """
    Calculates the axes of inertia of given molecules and returns
    their alignment deviation.

    Parameters
    ----------
    reference : str
        Molecule name `targets` should align to.
    targets : list of str
        Names of molecules to be aligned to `reference`
    threshold : float
        Target average of cosine of angle of alignment 
        between targets and reference.
    only_primaries : bool
        Consider only the largest inertia vectors.

    Returns
    -------
    float
        Mean absolute difference of threshold alignment and mean of all the
        cosines involved for each axis.
    """
    _validate = {
        parse.Required('reference'): parse.Molecule_name,
        parse.Required('targets'): [parse.Molecule_name],
        'threshold': parse.All(parse.Coerce(float), parse.Range(min=0, max=1)),
        'only_primaries': parse.Coerce(bool),
        }
    
    def __init__(self, reference=None, targets=None, only_primaries=False,
                 threshold=0.84, *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.threshold = threshold
        self._reference = reference
        self._targets = targets

    def reference(self, individual):
        """
        The reference molecule. Usually, the biggest in size
        """
        return individual.find_molecule(self._reference).compound.mol

    def targets(self, individual):
        return [individual.find_molecule(name).compound.mol for name in self._targets]

    def evaluate(self, individual):
        reference = self.reference(individual)
        targets = self.targets(individual)

        all_axes = []
        for target in [reference] + targets:
            axes = calculate_axes_of_inertia(target)
            all_axes.append(axes)

        best_cosines = list(calculate_alignment(all_axes[0], *all_axes[1:]))
        return abs(self.threshold - np.mean(best_cosines))
Пример #2
0
class Angle(ObjectiveProvider):
    """
    Angle class

    Parameters
    ----------
    threshold : float
        Optimum angle
    probes : list of str
        Atoms that make the angle, expressed as a series of 
        <molecule_name>/<serial_number> strings

    Returns
    -------
    float
        Deviation from threshold angle, in degrees
    """

    _validate = {
        parse.Required('probes'):
        parse.AssertList(parse.Named_spec("molecule", "atom")),
        parse.Required('threshold'):
        parse.Any(parse.Coerce(float), parse.In(['planar']))
    }

    def __init__(self, threshold=None, probes=None, *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.threshold = threshold
        self._probes = probes

    def probes(self, ind):
        for probe in self._probes:
            mol, serial = probe
            for atom in ind.find_molecule(mol).find_atoms(serial):
                yield atom

    def evaluate(self, ind):
        atoms_coords = [a.xformCoord() for a in self.probes(ind)]
        try:
            angle = chimera.angle(*atoms_coords)
        except TypeError:  # four atoms, means dihedral
            angle = chimera.dihedral(*atoms_coords)

        if self.threshold == 'planar':
            return abs(math.sin(math.radians(angle)))
        return abs(self.threshold - angle.real)
Пример #3
0
class Molecule(GeneProvider):
    """
    Interface around the :class:`gaudi.genes.molecule.Compound` to handle
    the GAUDI protocol and caching features.

    Parameters
    ----------
    path : str, optional
        Path to a molecule file or a directory containing dirs of molecule files.

    symmetry : str, optional
        If `path` is a directory, list of pairs of directories whose chosen
        mocule must be the same, thus enabling *symmetry*.

    hydrogens : bool, optional
        Add hydrogens to Molecule (True) or not (False).

    pdbfix : bool, optional
        Only for testing and debugging. Better run pdbfixer prior to GAUDI.
        Fix potential issues that may cause troubles with OpenMM forcefields.

    vdw_radii : dict {str: float} , optional
        Set a specific vdw_radius for a particular element (instead of standard
        Chimera VdW table). It can be useful in particular cases together with 
        a contacts objective. Example of use in the .yaml file: 
        vdw_radii: {
            Fe: 2.00,
            Cu: 2.16}
        Defaults to None

    Attributes
    ----------
    allele : tuple of str
        Paths to every fragment that composes a given Compound. It will consist
        of a single value tuple if there's no dynamic building involved; ie,
        a mol2 or pdb file as is.

    _CATALOG : dict
        Class attribute (shared among all `Molecule` instances) that holds
        all the possible molecules GAUDI can build given current `path`.

        If `path` is a single molecule file, that's the only possibility, but
        if it's set to a directory, the engine can potentially build all the
        combinations of molecule blocks found in those subdirectories.

        Normally, it is accessed via the `catalog` property.

    Notes
    -----
    **Use of `_cache`**

    `Molecule` class uses `_cache` to store already built molecules. Its entry in
    the `_cache` directory is a `boltons.cacheutils.LRU` cache (least recently used)
    set to a maximum size of 300 entries.

    .. todo ::

        The LRU cache size should be proportional to job size, depending on
        the population size and number of generations, but also taking available
        memory into account (?).

    """

    _validate = {
        parse.Required('path'): parse.RelPathToInputFile(),
        'symmetry': [[basestring]],
        'hydrogens': parse.Boolean,
        'pdbfix': parse.Boolean,
        'vdw_radii': {
            basestring: float
        }
    }

    _CATALOG = {}
    SUPPORTED_FILETYPES = ('mol2', 'pdb')

    def __init__(self,
                 path=None,
                 symmetry=None,
                 hydrogens=False,
                 pdbfix=False,
                 vdw_radii=None,
                 **kwargs):
        self._kwargs = kwargs.copy()
        GeneProvider.__init__(self, **kwargs)
        self._kwargs = kwargs
        self.path = path
        self.symmetry = symmetry
        self.hydrogens = hydrogens
        self.pdbfix = pdbfix
        self.vdw_radii = vdw_radii
        try:
            self.catalog = self._CATALOG[self.name]
        except KeyError:
            self.catalog = self._CATALOG[self.name] = tuple(
                self._compile_catalog())
        self._compounds_cache = self._cache.setdefault(
            self.name + '_compounds', LRU(300))
        self._atomlookup_cache = self._cache.setdefault(
            self.name + '_atomlookup', LRU(300))
        self._residuelookup_cache = self._cache.setdefault(
            self.name + '_residuelookup', LRU(300))
        self.allele = random.choice(self.catalog)

        # An optimization for similarity methods: xform coords are
        # cached here after all genes have expressed. See Individual.express.
        self._expressed_coordinates = None

    def __deepcopy__(self, memo):
        new = self.__class__(path=self.path,
                             symmetry=self.symmetry,
                             hydrogens=self.hydrogens,
                             pdbfix=self.pdbfix,
                             **self._kwargs)
        new.allele = self.allele + ()
        new._expressed_coordinates = self._expressed_coordinates
        return new

    @property
    def compound(self):
        """
        Get expressed allele on-demand (read-only attribute)
        """
        return self.get(self.allele)

    def express(self):
        """
        Adds Chimera molecule object to the viewer canvas.

        It also converts pseudobonds (used by Chimera to depict
        coordinated ligands to metals) to regular bonds.
        """
        chimera.openModels.add([self.compound.mol], shareXform=True)
        box.pseudobond_to_bond(self.compound.mol)

    def unexpress(self):
        """
        Removes the Chimera molecule from the viewer canvas
        (without deleting the object).
        """
        chimera.openModels.remove([self.compound.mol])

    def mate(self, mate):
        """
        .. todo::

            Allow mating while preserving symmetry
        """
        if len(self.allele) == 1:
            return
        if not self.symmetry:
            try:
                self.allele, mate.allele = deap.tools.cxTwoPoint(
                    list(self.allele), list(mate.allele))
            except (StopIteration, ValueError):
                self.allele, mate.allele = mate.allele, self.allele
            else:
                self.allele, mate.allele = tuple(self.allele), tuple(
                    mate.allele)

    def mutate(self, indpb):
        """
        VERY primitive. It only gets another compound.
        """
        if random.random() < self.indpb:
            self.allele = random.choice(self.catalog)

    def write(self,
              path=None,
              name=None,
              absolute=None,
              combined_with=None,
              filetype='mol2'):
        """
        Writes full mol2 to disk.

        .. todo::

            It'd be preferable to get a string instead of a file
        """
        if path and name:
            fullname = os.path.join(
                path, '{}_{}.{}'.format(name, self.name, filetype))
        elif absolute:
            fullname = absolute
        else:
            fileobject, fullname = tempfile.mkstemp(
                prefix='gaudi', suffix='.{}'.format(filetype))
            logger.warning("No output path provided. Using tempfile %s.",
                           fullname)

        molecules = [self.compound.mol]
        if combined_with:
            molecules.extend(ind.compound.mol for ind in combined_with)
        if filetype == 'mol2':
            writeMol2(molecules,
                      fullname,
                      temporary=True,
                      multimodelHandling='combined')
        elif filetype == 'pdb':
            chimera.pdbWrite(molecules, self.compound.mol.openState.xform,
                             fullname)
        else:
            raise ValueError(
                'Filetype {} not recognized. Try with mol2 or pdb'.format(
                    filetype))
        return fullname

    @classmethod
    def clear_cache(cls):
        GeneProvider.clear_cache()
        cls._CATALOG.clear()

    ############
    def __getitem__(self, key):
        """Implements dict-like item retrieval"""
        return self.get(key)

    def get(self, key):
        """
        Looks for the compound corresponding to `key` in `_cache`. If found,
        return it. Else, build it on demand, store it on cache and return it.

        Parameters
        ----------
        key : str
            Path (or combination of) to the requested molecule. It should be
            extracted from `catalog`.

        Returns
        -------
        gaudi.genes.molecule.Compound
            The result of building the requested molecule.

        """
        try:
            return self._compounds_cache[key]
        except KeyError:
            self._compounds_cache[key] = compound = self.build(key)
            return compound

    def build(self, key, where=None):
        """
        Builds a `Compound` following the recipe contained in `key` through
        `Compound.append` methods.

        Parameters
        ----------
        key : tuple of str
            Paths to the molecule blocks that comprise the final molecule.
            A single molecule is just a one-block recipe.

        Returns
        -------
        Compound
            The final molecule result of the sequential appending.
        """
        base = Compound(molecule=key[0])
        for molpath in key[1:]:
            base.append(Compound(molecule=molpath))
        if self.hydrogens:
            base.add_hydrogens()
        if self.pdbfix:
            base.apply_pdbfix()
        if self.vdw_radii:
            base.set_vdw_radii(self.vdw_radii)

        return base

    def _compile_catalog(self):
        """
        Computes all the possible combinations of given directories and mol2 files,
        taking symmetry requirements into account.

        Parameters
        ----------
        self.path
        self.symmetry

        Returns
        -------
        set
            A set of tuples of paths, each indicating a build recipe for a molecule
        """
        container = set()
        if os.path.isdir(self.path):
            folders = sorted(
                os.path.join(self.path, d) for d in os.listdir(self.path)
                if os.path.isdir(os.path.join(self.path, d))
                and not d.startswith('.') and not d.startswith('_'))
            if folders:
                catalog = itertools.product(*[
                    box.files_in(f, ext=self.SUPPORTED_FILETYPES)
                    for f in folders
                ])
                if isinstance(self.symmetry, list):
                    folders_last_level = [
                        os.path.basename(os.path.normpath(f)) for f in folders
                    ]
                    for entry in catalog:
                        if all(
                                os.path.basename(entry[
                                    folders_last_level.index(s1)]) == os.path.
                                basename(entry[folders_last_level.index(s2)])
                                for (s1, s2) in self.symmetry):
                            container.add(entry)
                else:
                    container.update(tuple(catalog))
            else:
                container.update(
                    (f, ) for f in box.files_in(self.path,
                                                ext=self.SUPPORTED_FILETYPES))
        elif (self.path.split('.')[-1] in self.SUPPORTED_FILETYPES):
            container.add((self.path, ))
        return container

    # API methods
    def find_atoms(self, serial, only_one=False):
        if serial == '*':
            return self.compound.mol.atoms
        try:
            return self._atomlookup_cache[(self.allele, serial)]
        except KeyError:
            atoms = self._find_atoms(serial, only_one)
            self._atomlookup_cache[(self.allele, serial)] = atoms
            return atoms

    def _find_atoms(self, serial, only_one=False):
        if isinstance(serial, int):  # search by serial number
            atoms = [
                a for a in self.compound.mol.atoms if a.serialNumber == serial
            ]
        elif isinstance(serial, basestring):  # search by name
            try:
                atoms = [getattr(self.compound, serial)]
            except AttributeError:
                atoms = [
                    a for a in self.compound.mol.atoms if a.name == serial
                ]
        else:
            raise ValueError('Serial {} not valid'.format(serial))
        if atoms:
            if only_one and len(atoms) > 1:
                raise TooManyAtoms(
                    "Found {} atoms for serial {} but expected 1.".format(
                        len(atoms), serial))
            return atoms
        raise AtomsNotFound("Atom '{}' not found in {}".format(
            serial, self.name))

    def find_atom(self, serial):
        return self.find_atoms(serial, only_one=True)[0]

    def find_residues(self, position, only_one=False):
        if position == '*':
            return self.compound.mol.residues
        try:
            return self._residuelookup_cache[(self.allele, position)]
        except KeyError:
            residues = self._find_residues(position, only_one)
            self._residuelookup_cache[(self.allele, position)] = residues
            return residues

    def _find_residues(self, position, only_one):
        residues = [
            r for r in self.compound.mol.residues if r.id.position == position
        ]
        if residues:
            if only_one and len(residues) > 1:
                raise TooManyResidues("Found {} residues for position {}")
            return residues
        raise ResiduesNotFound("Residue {} not found in {}".format(
            position, self.name))

    def find_residue(self, position):
        return self.find_residues(position, only_one=True)[0]

    def xyz(self, transformed=True):
        return get_atom_coordinates(self.compound.mol.atoms,
                                    transformed=transformed)
Пример #4
0
class Torsion(GeneProvider):

    """
    Parameters
    ----------
    target: str
        Name of gaudi.genes.molecule instance to perform rotation on
    flexibility : int or float
        Maximum number of degrees a bond can rotate
    max_bonds :
        Expected number of free rotations in molecule. Needed to store
        arbitrary rotations.
    anchor : str
        Molecule/atom_serial_number of reference atom for torsions
    rotatable_atom_types : list of str
        Which type of atom types (as in chimera.Atom.idatmType) should rotate.
        Defaults to ('C3', 'N3', 'C2', 'N2', 'P').
    rotatable_atom_names : list of str
        Which type of atom names (as in chimera.Atom.name) should rotate.
        Defaults to ().
    rotatable_bonds : list of [SerialNumberAtom1, SerialNumberAtom2, SerialNumberAnchor]
        Concrete bonds that are allowed to rotate. Atoms have to be
        designated using their chimera serial number. IMPORTANT: if set,
        these will be the ONLY bonds allowed to rotate, ignoring
        other possible conditions (e.g. rotatable_atom_types, 
        rotatable_atom_names...).

    Attributes
    ----------
    allele : tuple of float
        For i rotatable bonds in molecule, it contains i floats which correspond
        to each torsion angle. As such, each falls within [-180.0, 180.0).

    Notes
    -----

    .. todo ::

        `max_bonds` is now automatically computed, but probably won't deal
        nicely with block-built ligands.

    """

    _validate = {
        parse.Required("target"): parse.Molecule_name,
        "flexibility": parse.Degrees,
        "max_bonds": parse.All(parse.Coerce(int), parse.Range(min=0)),
        "anchor": parse.Named_spec("molecule", "atom"),
        "rotatable_atom_types": [basestring],
        "rotatable_atom_names": [basestring],
        "rotatable_elements": [basestring],
        "non_rotatable_bonds": [
            parse.All(
                [parse.Named_spec("molecule", "atom")], parse.Length(min=2, max=2)
            )
        ],
        "rotatable_bonds": [parse.All([parse.Coerce(int)], parse.Length(min=3, max=3))],
        "precision": parse.All(parse.Coerce(int), parse.Range(min=-3, max=3)),
    }

    BONDS_ROTS = {}

    def __init__(
        self,
        target=None,
        flexibility=360.0,
        max_bonds=None,
        anchor=None,
        rotatable_atom_types=("C3", "N3", "C2", "N2", "P"),
        rotatable_atom_names=(),
        rotatable_elements=(),
        non_rotatable_bonds=(),
        rotatable_bonds=(),
        precision=1,
        **kwargs
    ):
        GeneProvider.__init__(self, **kwargs)
        self._kwargs = kwargs
        self.target = target
        self.flexibility = 360.0 if flexibility > 360 else flexibility
        self.max_bonds = max_bonds
        self.rotatable_atom_types = rotatable_atom_types
        self.rotatable_atom_names = rotatable_atom_names
        self.rotatable_elements = rotatable_elements
        self.non_rotatable_bonds = non_rotatable_bonds
        self.concrete_rotatable_bonds = rotatable_bonds
        self.precision = precision
        self._anchor = anchor
        self.allele = [self.random_angle() for i in xrange(50)]

    def __expression_hooks__(self):
        if self.max_bonds is None:
            self.max_bonds = len(self.rotatable_bonds)
        self.allele = [self.random_angle() for i in xrange(self.max_bonds)]

    def express(self):
        """
        Apply rotations to rotatable bonds
        """
        for alpha, br in zip(self.allele, self.rotatable_bonds):
            try:
                if all(a.idatmType in ("C2", "N2") for a in br.bond.atoms):
                    alpha = 0 if alpha <= 0 else 180
                br.adjustAngle(alpha - br.angle, br.rotanchor)
            # A null bondrot was returned -> non-rotatable bond
            except AttributeError:
                continue

    def unexpress(self):
        """
        Undo the rotations
        """
        for br in self.rotatable_bonds:
            br.adjustAngle(-br.angle, br.rotanchor)

    def mate(self, mate):
        self_allele, mate_allele = cxSimulatedBinaryBounded(
            self.allele,
            mate.allele,
            eta=self.cxeta,
            low=-0.5 * self.flexibility,
            up=0.5 * self.flexibility,
        )
        self.allele[:] = [round(n, self.precision) for n in self_allele]
        mate.allele[:] = [round(n, self.precision) for n in mate_allele]

    def mutate(self, indpb):
        if random.random() < 0.5:
            allele, = mutPolynomialBounded(
                self.allele,
                indpb=self.indpb,
                eta=self.mteta,
                low=-0.5 * self.flexibility,
                up=0.5 * self.flexibility,
            )
            self.allele[:] = [round(n, self.precision) for n in allele]
        else:
            self.allele = [self.random_angle() for i in xrange(self.max_bonds)]

    def clear_cache(self):
        GeneProvider.clear_cache()
        self.BONDS_ROTS.clear()

    #####
    @property
    def molecule(self):
        return self.parent.find_molecule(self.target).compound.mol

    def random_angle(self):
        """
        Returns a random angle within flexibility limits
        """
        return round(
            random.uniform(-0.5 * self.flexibility, 0.5 * self.flexibility),
            self.precision,
        )

    @property
    def rotatable_bonds(self):
        """
        Gets potentially rotatable bonds in molecule

        First, it retrieves all the atoms. Then, the bonds are filtered,
        discarding coordination (pseudo)bonds and sort them by atom serial.

        For each bond, try to retrieve it from the cache. If unavailable,
        request a bond rotation object to chimera.BondRot.

        In this step, we have to discard non rotatable atoms (as requested
        by the user), and make sure the involved atoms are of compatible type.
        Namely, one of them must be either C3, N3, C2 or N2, and both of them,
        non-terminal (more than one neighbor).

        If the bond is valid, get the BondRot object. Chimera will complain
        if we already have requested that bond previously, or if the bond is in a
        cycle. Handle those exceptions silently, and get the next bond in that case.

        If no exceptions are raised, then store the rotation anchor in the BondRot
        object (that's the nearest atom in the bond to the molecular anchor),
        and store the BondRot object in the rotations cache.
        """
        try:
            return self.molecule._rotatable_bonds
        except AttributeError:
            self.molecule._rotatable_bonds = list(self._compute_rotatable_bonds())
            return self.molecule._rotatable_bonds

    def _compute_rotatable_bonds(self):
        bonds = sorted(
            self.molecule.bonds, key=lambda b: min(y.serialNumber for y in b.atoms)
        )

        non_rotatable_bonds = []
        for atom_a, atom_b in self.non_rotatable_bonds:
            a = self.parent.find_molecule(atom_a.molecule).find_atom(atom_a.atom)
            b = self.parent.find_molecule(atom_b.molecule).find_atom(atom_b.atom)
            bond = a.findBond(b)
            if bond:
                non_rotatable_bonds.append(bond)
            else:
                logger.warning("Atoms {} and {} are not bonded!".format(a, b))

        if self.concrete_rotatable_bonds:
            rotatable_bonds = []
            for atom_a, atom_b, anchor in self.concrete_rotatable_bonds:
                a = self.parent.find_molecule(self.target).find_atom(atom_a)
                b = self.parent.find_molecule(self.target).find_atom(atom_b)
                an = self.parent.find_molecule(self.target).find_atom(anchor)
                bond = a.findBond(b)
                if bond:
                    try:
                        br = chimera.BondRot(bond)
                    except (chimera.error, ValueError) as v:
                        if "cycle" in str(v) or "already used" in str(v):
                            continue  # discard bonds in cycles and used!
                        raise
                    else:
                        br.rotanchor = an
                        yield br
                else:
                    logger.warning("Atoms {} and {} are not bonded!".format(a, b))
        else:

            def conditions(*atoms):
                for a in atoms:
                    # Must be satisfied by at least one atom
                    if a.numBonds == 1 or a.element.isMetal:
                        return False
                for a in atoms:
                    if (
                        a.name == "DUM"
                        or a.idatmType in self.rotatable_atom_types
                        or a.name in self.rotatable_atom_names
                        or a.element.name in self.rotatable_elements
                    ):
                        return True

            for b in bonds:
                if b in non_rotatable_bonds:
                    continue
                if conditions(*b.atoms):
                    try:
                        br = chimera.BondRot(b)
                    except (chimera.error, ValueError) as v:
                        if "cycle" in str(v) or "already used" in str(v):
                            continue  # discard bonds in cycles and used!
                        raise
                    else:
                        br.rotanchor = box.find_nearest(self.anchor, b.atoms)
                        yield br

    @property
    def anchor(self):
        """
        Get the molecular anchor. Ie, the *root* of the rotations, the fixed
        atom of the molecule.

        Usually, this is the target atom in the Search gene, but if we can't find it,
        get the nearest atom to the geometric center of the molecule, and if it's not
        possible, the ``donor`` atom of the molecule.
        """
        try:
            return self.molecule._rotation_anchor
        except AttributeError:
            pass
        if self._anchor is not None:
            mol, atom = self._anchor
            try:
                molecule_gene = self.parent.find_molecule(mol)
                anchor = molecule_gene.find_atom(atom)
            except StopIteration:
                pass
            else:
                self.molecule._rotation_anchor = anchor
                return anchor
        target_gene = self.parent.find_molecule(self.target)
        try:

            if isinstance(self.target, str):
                mol = target_gene.compound.mol
                anchor = target_gene.find_atom(nearest_atom(mol, center(mol)))
        except (StopIteration, AttributeError):
            anchor = target_gene.compound.donor
        self.molecule._rotation_anchor = anchor
        return anchor
Пример #5
0
class Hbonds(ObjectiveProvider):

    """
    Hbonds class

    Parameters
    ----------
    probes : list of str
        Names of molecules being object of analysis
    radius : float
        Maximum distance from any point of probe that is searched
        for a possible interaction
    distance_tolerance : float, optional
        Allowed deviation from ideal distance to consider a valid H bond.
    angle_tolerance : float, optional
        Allowed deviation from ideal angle to consider a valid H bond.
    only_intermolecular : boolean, optional
        Only intermolecular interactions are considered (defaults to True)
    
    Returns
    -------
    int
        Number of detected Hydrogen bonds.
    """

    _validate = {
        parse.Required('probes'): [parse.Molecule_name],
        'radius': parse.All(parse.Coerce(float), parse.Range(min=0)),
        'distance_tolerance': float,
        'angle_tolerance': float,
        'only_intermolecular': bool
        }
    def __init__(self, probes=None, radius=5.0, distance_tolerance=0.4, angle_tolerance=20.0,
                 only_intermolecular=True, *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self._probes = probes
        self.distance_tolerance = distance_tolerance
        self.angle_tolerance = angle_tolerance
        self.radius = radius
        self.intramodel = False if only_intermolecular else True

    def molecules(self, ind):
        return [m.compound.mol for m in ind._molecules.values()]

    def probes(self, ind):
        return [ind.find_molecule(p).compound.mol for p in self._probes]

    def evaluate(self, ind):
        """
        Find H bonds within self.radius angstroms from self.probes, and return
        only those that interact with probe. Ie, discard those hbonds in that search
        space whose none of their atoms involved are not part of self.probe.
        """
        molecules = self.molecules(ind)
        probe_atoms = [a for m in self.probes(ind) for a in m.atoms]
        test_atoms = self._surrounding_atoms(probe_atoms, molecules)
        hbonds = findHBonds(molecules, cacheDA=self._cache,
                                      donors=test_atoms, acceptors=test_atoms,
                                      distSlop=self.distance_tolerance,
                                      angleSlop=self.angle_tolerance,
                                      intermodel=True, intramodel=self.intramodel)
        hbonds = filterHBondsBySel(hbonds, probe_atoms, 'any')

        return len(hbonds)

    def display(self, bonds):
        """
        Mock method to show a graphical depiction of the found H Bonds.
        """
        return draw_interactions(bonds, name=self.name, startCol='00FFFF', endCol='00FFFF')

    ###
    def _surrounding_atoms(self, atoms, molecules):
        self.zone.clear()
        self.zone.add(atoms)
        self.zone.merge(chimera.selection.REPLACE,
                        chimera.specifier.zone(self.zone, 'atom', None, self.radius, molecules))
        return self.zone.atoms()
Пример #6
0
class DSX(ObjectiveProvider):
    """
    DSX class

    Parameters
    ----------
    protein : str
        The molecule name that is acting as a protein
    ligand : str
        The molecule name that is acting as a ligand
    binary : str, optional
        Path to the DSX binary. Only needed if ``drugscorex`` is not in PATH.
    potentials : str, optional
        Path to DSX potentials. Only needed if ``DSX_POTENTIALS`` env var has not
        been set by the installation process (``conda install -c insilichem drugscorex``
        normally takes care of that).
    terms : list of bool, optional
        Enable (True) or disable (False) certain terms in the score function in
        this order: distance-dependent pair potentials, torsion potentials,
        intramolecular clashes, sas potentials, hbond potentials

    sorting : int, defaults to 1
        Sorting mode. An int between 0-6, read binary help for -S::

            -S int :  Here you can specify the mode that affects how the results
                    will be sorted. The default mode is '-S 1', which sorts the
                    ligands in the same order as they are found in the lig_file.
                    The following modes are possible::

                        0: Same order as in the ligand file
                        1: Ordered by increasing total score
                        2: Ordered by increasing per-atom-score
                        3: Ordered by increasing per-contact-score
                        4: Ordered by increasing rmsd
                        5: Ordered by increasing torsion score
                        6: Ordered by increasing per-torsion-score

    cofactor_mode : int, defaults to 0
        Cofactor handling mode. An int between 0-7, read binary help for -I::

            -I int :  Here you can specify the mode that affects how cofactors,
                    waters and metals will be handeled.
                    The default mode is '-I 1', which means, that all molecules
                    are treated as part of the protein. If a structure should
                    not be treated as part of the protein you have supply a
                    seperate file with seperate MOLECULE entries corresponding
                    to each MOLECULE entry in the ligand_file (It is assumed
                    that the structure, e.g. a cofactor, was kept flexible in
                    docking, so that there should be a different geometry
                    corresponding to each solution. Otherwise it won't make
                    sense not to treat it as part of the protein.).
                    The following modes are possible:
                        0: cofactors, waters and metals interact with protein,
                        ligand and each other
                        1: cofactors, waters and metals are treated as part of
                        the protein
                        2: cofactors and metals are treated as part of the protein
                        (waters as in mode 0)
                        3: cofactors and waters are treated as part of the protein
                        4: cofactors are treated as part of the protein
                        5: metals and waters are treated as part of the protein
                        6: metals are treated as part of the protein
                        7: waters are treated as part of the protein
                    Please note: Only those structures can be treated
                    individually, which are supplied in seperate files.
    with_covalent : bool, defaults to False
        Whether to deal with covalently bonded atoms as normal atoms (False) or not (True)
    with_metals : bool, defaults to True
        Whether to deal with metal atoms as normal atoms (False) or not (True)

    Returns
    -------
    float
        Interaction energy as reported by DSX output logs.
    """
    _validate = {
        parse.Required('proteins'): [parse.Molecule_name],
        parse.Required('ligands'): [parse.Molecule_name],
        'binary': parse.ExpandUserPathExists,
        'potentials': parse.ExpandUserPathExists,
        'terms': parse.All([parse.Coerce(bool)], parse.Length(min=5, max=5)),
        'sorting': parse.All(parse.Coerce(int), parse.Range(min=0, max=6)),
        'cofactor_mode': parse.All(parse.Coerce(int), parse.Range(min=0,
                                                                  max=7)),
        'with_covalent': parse.Coerce(bool),
        'with_metals': parse.Coerce(bool)
    }

    def __init__(self,
                 binary=None,
                 potentials=None,
                 proteins=('Protein', ),
                 ligands=('Ligand', ),
                 terms=None,
                 sorting=1,
                 cofactor_mode=0,
                 with_covalent=False,
                 with_metals=True,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.binary = find_executable(
            'drugscorex') if binary is None else binary
        if not self.binary:
            raise ValueError(
                'Could not find `drugscorex` executable. Please install it '
                'with `conda install -c insilichem drugscorex` or manually '
                'specify the location with `binary` and `potentials` keys.')
        self.potentials = potentials
        self.protein_names = proteins
        self.ligand_names = ligands
        self.terms = terms
        self.sorting = sorting
        self.cofactor_mode = cofactor_mode
        self.with_covalent = with_covalent
        self.with_metals = with_metals

        self._oldworkingdir = os.getcwd()
        self._paths = {}
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = default_tempdir()

    def get_molecule_by_name(self, ind, *names):
        """
        Get a molecule gene instance of individual by its name
        """
        for name in names:
            yield ind.find_molecule(name)

    def evaluate(self, ind):
        """
        Run a subprocess calling DSX binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        self.tmpfile = os.path.join(self.tmpdir, next(tempnames()))
        proteins = list(self.get_molecule_by_name(ind, *self.protein_names))
        ligands = list(self.get_molecule_by_name(ind, *self.ligand_names))

        self.prepare_proteins(proteins)
        self.prepare_ligands(ligands)
        command = self.prepare_command()

        try:
            os.chdir(self.tmpdir)
            stream = subprocess.check_output(command, universal_newlines=True)
        except subprocess.CalledProcessError:
            logger.warning("Could not run DSX with command %s", command)
            return -100000 * self.weight
        else:
            return self.parse_output(stream)
        finally:
            self.clean()
            os.chdir(self._oldworkingdir)

    def prepare_proteins(self, proteins):
        proteinpath = '{}_proteins.pdb'.format(self.tmpfile)
        last_protein = proteins.pop()
        last_protein.write(absolute=proteinpath,
                           combined_with=proteins,
                           filetype='pdb')
        self._paths['proteins'] = proteinpath

    def prepare_ligands(self, ligands):
        ligandpath = '{}_ligands.mol2'.format(self.tmpfile)
        metalpath = '{}_metals.mol2'.format(self.tmpfile)
        ligand_mols = [lig.compound.mol for lig in ligands]

        if self.with_metals:
            # Split metals from ligand
            nonmetal_mols, metal_mols = [], []
            for ligand in ligand_mols:
                nonmetals, metals = [], []
                for atom in ligand.atoms:
                    if atom.element.isMetal:
                        metals.append(atom)
                    else:
                        nonmetals.append(atom)
                nonmetal_mols.append(molecule_from_atoms(ligand, nonmetals))
                if metals:
                    metal_mols.append(molecule_from_atoms(ligand, metals))
            if metal_mols:
                writeMol2(metal_mols, metalpath, temporary=True)
                self._paths['metals'] = metalpath
                ligand_mols = nonmetal_mols

        writeMol2(ligand_mols,
                  ligandpath,
                  temporary=True,
                  multimodelHandling='combined')
        self._paths['ligands'] = ligandpath

    def prepare_command(self):
        cmd = [self.binary]
        if self.with_covalent:
            cmd.append('-c')
        cmd.extend(
            ['-P', self._paths['proteins'], '-L', self._paths['ligands']])
        if self.with_metals:
            metalpath = self._paths.get('metals')
            if metalpath:
                cmd.extend(['-M', metalpath])
        if self.cofactor_mode is not None:
            cmd.extend(['-I', self.cofactor_mode])
        if self.sorting is not None:
            cmd.extend(['-S', self.sorting])
        if self.terms is not None:
            T0, T1, T2, T3, T4 = [1.0 * t for t in self.terms]
            cmd.extend(['-T0', T0, '-T1', T1, '-T2', T2, '-T3', T3, '-T4', T4])
        if self.potentials is not None:
            cmd.extend(['-D', self.potentials])
        return map(str, cmd)

    def parse_output(self, stream):
        # 1. Get output filename from stdout (located at working directory)
        # 2. Find line '@RESULTS' and go to sixth line below
        # 3. The score is in the first row of the table, at the third field
        dsx_results = os.path.join(self.tmpdir,
                                   stream.splitlines()[-2].split()[-1])
        self._paths['output'] = dsx_results
        with open(dsx_results) as f:
            lines = f.read().splitlines()
            i = lines.index('@RESULTS')
            score = lines[i + 4].split('|')[3].strip()
            return float(score)

    def clean(self):
        for p in self._paths.values():
            os.remove(p)
        self._paths.clear()
Пример #7
0
class Rotamers(GeneProvider):

    """
    Rotamers class

    Parameters
    ----------
    residues : list of str
        Residues that should be analyzed. This has to be in the form:

            [ Protein/233, Protein/109 ]

        where the first element (before slash) is the gaudi.genes.molecule name
        and the second element (after slash) is the residue position number in that
        molecule.

        This list of str is later parsed to the proper chimera.Residue objects

    library : {'Dunbrack', 'Dynameomics'}
        The rotamer library to use.

    with_original: bool, defaults to True
        Whether to include the original set of chi angles as part of the
        rotamer library.

    Attributes
    ----------
    allele : list of float
        For i residues, it contains i floats within [0, 1), that will point
        to the selected rotamer torsions for each residue.
    """
    _validate = {
        parse.Required('residues'): [parse.Named_spec("molecule", "residue")],
        'library': parse.Any('Dunbrack', 'dunbrack', 'Dynameomics', 'dynameomics'),
        'with_original': parse.Boolean,
       }

    # Avoid unnecesary calls to expensive get_rotamers if residue is known
    # to not have any rotamers
    _residues_without_rotamers = set(('ALA', 'GLY'))

    def __init__(self, residues=None, library='Dunbrack', with_original=True, **kwargs):
        GeneProvider.__init__(self, **kwargs)
        self._kwargs = kwargs
        self._residues = residues
        self.library = library
        self.with_original = with_original
        self.allele = []
        # set caches
        try:
            self.residues = self._cache[self.name + '_residues']
        except KeyError:
            self.residues = self._cache[self.name + '_residues'] = OrderedDict()
        try:
            self.rotamers = self._cache[self.name + '_rotamers']
        except KeyError:
            self.rotamers = self._cache[self.name + '_rotamers'] = OrderedDict()


    def __ready__(self):
        """
        Second stage of initialization.

        It parses the requested residues strings to actual residues.
        """
        for molname, pos in self._residues:
            residues = self.parent.find_molecule(molname).find_residues(pos)
            if len(residues) > 1 and pos != '*':
                logger.warn('Found one more than residue for %s/%s', molname, pos)
            for r in residues:
                self.patch_residue(r)
                self.residues[(molname, r.id.position)] = r
                self.allele.append(random.random())

    def __deepcopy__(self, memo):
        new = self.__class__(residues=self._residues, library=self.library, **self._kwargs)
        new.residues = self.residues
        new.allele = self.allele[:]
        return new

    def express(self):
        for ((molname, pos), residue), i in zip(self.residues.items(), self.allele):
            if residue.type not in self._residues_without_rotamers:
                try:
                    rotamers = self.retrieve_rotamers(molname, pos, residue,
                                                      library=self.library.title())
                except NoResidueRotamersError:  # ALA, GLY...
                    logger.warn('%s/%s (%s) has no rotamers', molname, pos, residue.type)
                    self._residues_without_rotamers.add(residue.type)
                else:
                    rotamer_chis = rotamers[int(i * len(rotamers))]
                    self.update_rotamer(residue, rotamer_chis)

    def unexpress(self):
        for res in self.residues.values():
            for torsion in res._rotamer_torsions:
                torsion.adjustAngle(-torsion.angle, torsion.rotanchor)

    def mate(self, mate):
        self.allele, mate.allele = deap.tools.cxTwoPoint(self.allele, mate.allele)

    def mutate(self, indpb):
        self.allele = [random.random() if random.random() < indpb else i for i in self.allele]

    def retrieve_rotamers(self, molecule, position, residue, library='Dunbrack'):
        try:
            rotamers = self.rotamers[(molecule, position)]
        except KeyError:
            def sort_by_rmsd(ref, query):
                ref_atoms = [ref.atomsMap[a.name][0] for a in query.atoms]
                return Midas.rmsd(ref_atoms, query.atoms)

            chis = getRotamerParams(residue, lib=library)[2]
            rotamers_mols = getRotamers(residue, lib=library)[1]
            reference = residue if self.with_original else rotamers_mols[0].residues[0]
            rotamers_and_chis = zip(rotamers_mols, [c.chis for c in chis])
            rotamers_and_chis.sort(key=lambda rc: sort_by_rmsd(reference, rc[0]))
            rotamers = zip(*rotamers_and_chis)[1]
            if self.with_original:
                rotamers = (self.all_chis(residue),) + rotamers
            self.rotamers[(molecule, position)] = rotamers
            for rot in rotamers_mols:
                rot.destroy()
        return rotamers

    @staticmethod
    def update_rotamer(residue, chis):
        for bondrot, chi in zip(residue._rotamer_torsions, chis):
            bondrot.adjustAngle(chi - bondrot.chi, bondrot.rotanchor)

    @staticmethod
    def patch_residue(residue):
        if getattr(residue, '_rotamer_torsions', None):
            return
        residue._rotamer_torsions = [] # BondRot objects cache
        alpha_carbon = next((a for a in residue.atoms if a.name == 'CA'), residue.atoms[0])
        for chi in range(1, 5):
            try:
                atoms = chiAtoms(residue, chi)
                bond = atoms[1].bondsMap[atoms[2]]
                br = BondRot(bond)
                br.rotanchor = box.find_nearest(alpha_carbon, bond.atoms)
                br.chi = dihedral(*[a.coord() for a in atoms])
                residue._rotamer_torsions.append(br)
            except AtomsMissingError:
                break
            except (chimera.error, ValueError) as v:
                if "cycle" in str(v) or "already used" in str(v):
                    continue  # discard bonds in cycles and used!
                break

    @staticmethod
    def all_chis(residue):
        chis = []
        for i in range(1, 5):
            try:
                chis.append(getattr(residue, 'chi{}'.format(i)))
            except AttributeError:
                break
        return chis
Пример #8
0
class Search(GeneProvider):
    """
    Parameters
    ----------
    target : namedtuple or Name of gaudi.genes.molecule
        Can be either:
        - The *anchor* atom of the molecule we want to move, with syntax
        ``<molecule_name>/<index>``. For example, if we want to move Ligand
        using atom with serial number = 1 as pivot, we would specify
        ``Ligand/1``. It's parsed to the actual chimera.Atom later on.
        - A name of gaudi.genes.molecule instance. In this case, the *anchor* 
        atom for the movement of the molecule will be set to its nearest 
        atom to the geometric center of the molecule.
    center : 3-item list or tuple of float, optional
        Coordinates to the center of the desired search sphere
    radius : float
        Maximum distance from center that the molecule can move
    rotate : bool, bool
        If False, don't rotatate the molecule - only translation
    precision : int, bool
        Rounds the decimal part of the 3D search matrix to get a coarser
        model of space. Ie, less points can be accessed, the search is less
        exhaustive, more variability in less runs.

    Attributes
    ----------
    allele : 3-tuple of 4-tuple of floats
        A 4x3 matrix of float, as explained in Notes.
    origin : 3-tuple of float
        The initial position of the requested target molecule. If we don't take this
        into account, we can't move the molecule around was not originally in the
        center of the sphere.

    Notes
    -----
    **How matricial translation and rotation takes place**

    A single movement is summed up in a 4x3 matrix:

        (
        (R1, R2, R3, T1),
        (R4, R5, R6, T2),
        (R7, R8, R9, T3)
        )

    R-elements contain the rotation information, while T elements account for
    the translation movement.

    That matrix can be obtained from multipying three different matrices with
    this expression:

        multiply_matrices(translation, rotation, to_zero)

    To understand the operation, it must be read from the right:

        1. First, translate the molecule the origin of coordinates 0,0,0
        2. In that position, the rotation can take place.
        3. Then, translate to the final coordinates from zero. There's no need
           to get back to the original position.

    How do we get the needed matrices?

    - ``to_zero``. Record the original position (`origin`) of the molecule and
      multiply it by -1. Done with method `to_zero()`.

    - ``rotation``. Obtained directly from ``FitMap.search.random_rotation``

    - ``translation``. Check docstring of ``random_translation()`` in this module.

    """

    _validate = {
        parse.Required("target"):
        parse.Any(parse.Named_spec("molecule", "atom"), parse.Molecule_name),
        "center":
        parse.Any(parse.Coordinates, parse.Named_spec("molecule", "atom")),
        "radius":
        parse.Coerce(float),
        "rotate":
        parse.Boolean,
        "precision":
        parse.All(parse.Coerce(int), parse.Range(min=-3, max=6)),
        "interpolation":
        parse.All(parse.Coerce(float), parse.Range(min=0, max=1.0)),
    }

    def __init__(self,
                 target=None,
                 center=None,
                 radius=None,
                 rotate=True,
                 precision=0,
                 interpolation=0.5,
                 **kwargs):
        GeneProvider.__init__(self, **kwargs)
        self.radius = radius
        self.rotate = rotate
        self.precision = precision
        self._center = center
        self.target = target
        self.interpolation = interpolation

    def __ready__(self):
        if isinstance(self.target, str):
            mol = self.parent.find_molecule(self.target).compound.mol
            anchor_atom = nearest_atom(mol, center(mol))
            self.target = parse.MoleculeAtom(self.target, anchor_atom)
        self.allele = self.random_transform()

    @property
    def center(self):
        if self._center:
            return parse_origin(self._center, self.parent)
        else:
            return self.origin

    @property
    def molecule(self):
        return self.parent.find_molecule(self.target.molecule).compound.mol

    @property
    def origin(self):
        return parse_origin(self.target, self.parent)

    @property
    def to_zero(self):
        """
        Return a translation matrix that takes the molecule from its
        original position to the origin of coordinates (0,0,0).

        Needed for rotations.
        """
        x, y, z = self.origin
        return ((1.0, 0.0, 0.0, -x), (0.0, 1.0, 0.0, -y), (0.0, 0.0, 1.0, -z))

    def express(self):
        """
        Multiply all the matrices, convert the result to a chimera.CoordFrame and
        set that as the xform for the target molecule. If precision is set, round them.
        """
        matrices = self.allele + (self.to_zero, )
        if self.precision > 0:
            self.molecule.openState.xform = M.chimera_xform(
                M.multiply_matrices(
                    *numpy_around(matrices, self.precision).tolist()))
        else:
            self.molecule.openState.xform = M.chimera_xform(
                M.multiply_matrices(*matrices))

    def unexpress(self):
        """
        Reset xform to unity matrix.
        """
        self.molecule.openState.xform = X()

    def mate(self, mate):
        """
        Interpolate the matrices and assign them to each individual.
        Ind1 gets the rotated interpolation, while Ind2 gets the translation.
        """
        xf1 = M.chimera_xform(M.multiply_matrices(*self.allele))
        xf2 = M.chimera_xform(M.multiply_matrices(*mate.allele))
        interp = M.xform_matrix(M.interpolate_xforms(xf1, ZERO, xf2, 0.5))
        interp_rot = [x[:3] + (0, ) for x in interp]
        interp_tl = [
            y[:3] + x[-1:] for x, y in zip(interp, M.identity_matrix())
        ]
        self.allele, mate.allele = (
            (self.allele[0], interp_rot),
            (interp_tl, mate.allele[1]),
        )

    def mutate(self, indpb):
        if random.random() < self.indpb:
            xf1 = M.chimera_xform(M.multiply_matrices(*self.allele))
            xf2 = M.chimera_xform(
                M.multiply_matrices(*self.random_transform()))
            interp = M.xform_matrix(
                M.interpolate_xforms(xf1, ZERO, xf2, self.interpolation))
            interp_rot = [x[:3] + (0, ) for x in interp]
            interp_tl = [
                y[:3] + x[-1:] for x, y in zip(interp, M.identity_matrix())
            ]
            self.allele = interp_tl, interp_rot

    #####
    def random_transform(self):
        """
        Wrapper function to provide translation and rotation in a single call
        """
        rotation = random_rotation() if self.rotate else IDENTITY
        translation = random_translation(self.center, self.radius)
        return translation, rotation
Пример #9
0
class NormalModes(GeneProvider):
    """
    NormalModes class

    Parameters
    ----------
    method : str
        Either: 
        - prody : calculate normal modes using prody algorithms
        - gaussian : read normal modes from a gaussian output file
    target : str
        Name of the Gene containing the actual molecule
    modes : list, optional, default=range(12)
        Modes to be used to move the molecule
    group_by : str or callable, optional, default=None
        group_by_*: algorithm name or callable
        coarseGrain(prm) which makes ``mol.select().setBetas(i)``,
        where ``i`` is the index Coarse Grain group,
        and ``prm`` is ``prody.AtomGroup``
    group_lambda : int, optional
        Either: number of residues per group (default=7), or
        total mass per group (default=100)
    path : str
        Gaussian or prody modes output path. Required if ``method`` is
        ``gaussian``.
    write_modes: bool, optional
        write a ``molecule_modes.nmd`` file with the ProDy modes
    n_samples : int, optional, default=10000
        number of conformations to generate
    rmsd : float, optional, default=1.0
        average RMSD that the conformations will have with respect 
        to the initial conformation

    Attributes
    ----------
    allele : slice of prody.ensemble
        Randomly picked coordinates from NORMAL_MODE_SAMPLES
    NORMAL_MODES : prody.modes
        normal modes calculated for the molecule or readed
        from the gaussian frequencies output file stored
        in a prody modes class (ANM or RTB)
    NORMAL_MODE_SAMPLES : prody.ensemble
        configurations applying modes to molecule
    _original_coords : numpy.array
        Parent coordinates
    _chimera2prody : dict
        _chimera2prody[chimera_index] = prody_index
    """

    _validate = {
        parse.Required('method'): parse.In(['prody', 'gaussian']),
        'path': parse.RelPathToInputFile(),
        'write_modes': parse.Boolean,
        parse.Required('target'): parse.Molecule_name,
        'group_by': parse.In(['residues', 'mass', 'calpha', '']),
        'group_lambda': parse.All(parse.Coerce(int), parse.Range(min=1)),
        'modes': [parse.All(parse.Coerce(int), parse.Range(min=0))],
        'n_samples': parse.All(parse.Coerce(int), parse.Range(min=1)),
        'rmsd': parse.All(parse.Coerce(float), parse.Range(min=0))
    }

    def __init__(self,
                 method='prody',
                 target=None,
                 modes=None,
                 n_samples=10000,
                 rmsd=1.0,
                 group_by=None,
                 group_lambda=None,
                 path=None,
                 write_modes=False,
                 **kwargs):
        # Fire up!
        GeneProvider.__init__(self, **kwargs)
        self.method = method
        self.target = target
        self.modes = modes if modes is not None else range(12)
        self.max_modes = max(self.modes) + 1
        self.n_samples = n_samples
        self.rmsd = rmsd
        self.group_by = None
        self.group_by_options = None
        self.path = None
        self.write_modes = write_modes
        if method == 'prody':
            if path is None:
                self.normal_modes_function = self.calculate_prody_normal_modes
                self.group_by = group_by
                self.group_by_options = {} if group_lambda is None else {
                    'n': group_lambda
                }
            else:
                self.path = path
                self.normal_modes_function = self.read_prody_normal_modes
        else:  # gaussian
            self.normal_modes_function = self.read_gaussian_normal_modes
            if path is None:
                raise ValueError('Path is required if method == gaussian')
            self.path = path

        if self.name not in self._cache:
            self._cache[self.name] = LRU(300)

    def __ready__(self):
        """
        Second stage of initialization

        It saves the parent coordinates, calculates the normal modes and initializes the allele
        """
        cached = self._CACHE.get('normal_modes')
        if not cached:
            normal_modes, normal_modes_samples, chimera2prody, prody_molecule = self.normal_modes_function(
            )
            self._CACHE['normal_modes'] = normal_modes
            self._CACHE['normal_modes_samples'] = normal_modes_samples
            self._CACHE['chimera2prody'] = chimera2prody
            self._CACHE['original_coords'] = chimeracoords2numpy(self.molecule)
            if self.write_modes:
                title = os.path.join(self.parent.cfg.output.path,
                                     '{}_modes.nmd'.format(self.molecule.name))
                prody.writeNMD(title, normal_modes, prody_molecule)
        self.allele = random.choice(self.NORMAL_MODES_SAMPLES)

    def express(self):
        """
        Apply new coords as provided by current normal mode
        """
        c2p = self._chimera2prody
        for atom in self.molecule.atoms:
            index = c2p[atom.serialNumber]
            new_coords = self.allele[index]
            atom.setCoord(chimera.Point(*new_coords))

    def unexpress(self):
        """
        Undo coordinates change
        """
        for i, atom in enumerate(self.molecule.atoms):
            atom.setCoord(chimera.Point(*self._original_coords[i]))

    def mate(self, mate):
        """
        .. todo::
        
            Combine coords between two samples in NORMAL_MODES_SAMPLES?
            Or two samples between diferent NORMAL_MODES_SAMPLES?
            Or combine samples between two NORMAL_MODES_SAMPLES?

            For now : pass
        """
        pass

    def mutate(self, indpb):
        """
        (mutate to/get) another SAMPLE with probability = indpb
        """
        if random.random() < self.indpb:
            return random.choice(self.NORMAL_MODES_SAMPLES)

    #####
    @property
    def molecule(self):
        return self.parent.genes[self.target].compound.mol

    @property
    def _CACHE(self):
        return self._cache[self.name]

    @property
    def NORMAL_MODES(self):
        return self._CACHE.get('normal_modes')

    @property
    def NORMAL_MODES_SAMPLES(self):
        return self._CACHE.get('normal_modes_samples')

    @property
    def _chimera2prody(self):
        return self._CACHE.get('chimera2prody')

    @property
    def _original_coords(self):
        return self._CACHE.get('original_coords')

    def calculate_prody_normal_modes(self):
        """
        calculate normal modes, creates a diccionary between chimera and prody indices
        and calculate n_confs number of configurations using this modes
        """
        prody_molecule, chimera2prody = convert_chimera_molecule_to_prody(
            self.molecule)
        modes = prody_modes(prody_molecule, self.max_modes,
                            GROUPERS[self.group_by], **self.group_by_options)
        samples = prody.sampleModes(modes=modes[self.modes],
                                    atoms=prody_molecule,
                                    n_confs=self.n_samples,
                                    rmsd=self.rmsd)
        samples.addCoordset(prody_molecule)
        samples_coords = [sample.getCoords() for sample in samples]
        return modes, samples_coords, chimera2prody, prody_molecule

    def read_prody_normal_modes(self):
        prody_molecule, chimera2prody = convert_chimera_molecule_to_prody(
            self.molecule)
        modes = prody.parseNMD(self.path)[0]
        samples = prody.sampleModes(modes=modes[self.modes],
                                    atoms=prody_molecule,
                                    n_confs=self.n_samples,
                                    rmsd=self.rmsd)
        samples.addCoordset(prody_molecule)
        samples_coords = [sample.getCoords() for sample in samples]
        return modes, samples_coords, chimera2prody, prody_molecule

    def read_gaussian_normal_modes(self):
        """
        read normal modes, creates a diccionary between chimera and prody indices
        and calculate n_confs number of configurations using this modes
        """
        prody_molecule, chimera2prody = convert_chimera_molecule_to_prody(
            self.molecule)
        modes = gaussian_modes(self.path)

        samples = prody.sampleModes(modes=modes[self.modes],
                                    atoms=prody_molecule,
                                    n_confs=self.n_samples,
                                    rmsd=self.rmsd)
        samples.addCoordset(prody_molecule)
        samples_coords = [sample.getCoords() for sample in samples]
        return modes, samples_coords, chimera2prody, prody_molecule
Пример #10
0
class Solvation(ObjectiveProvider):
    """
    Solvation class

    Parameters
    ----------
    targets : [str]
        Names of the molecule genes being analyzed
    threshold : float, optional, default=0
        Optimize the difference to this value
    radius : float, optional, default=5.0
        Max distance to search for neighbor atoms from targets.
    method : str, optional, default=area
        Which method should be used. Both methods compute the surface
        of the solvated molecule. `area` returns the surface area of such
        surface, while `volume` returns the volume occuppied by the model.

    Returns
    -------
    float
        Surface area of solvated shell, in A² (if method=area), or volume
        of solvated shell, in A³ (if method=volume).
    """

    _validate = {
        parse.Required('targets'): [parse.Molecule_name],
        'threshold': parse.All(parse.Coerce(float), parse.Range(min=0)),
        'radius': parse.All(parse.Coerce(float), parse.Range(min=0)),
        'method': parse.In(['volume', 'area'])
    }

    def __init__(self,
                 targets=None,
                 threshold=0.0,
                 radius=5.0,
                 method='area',
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self._targets = targets
        self.threshold = threshold
        self.radius = radius
        self.method = method
        if method == 'area':
            self.evaluate = self.evaluate_area
        else:
            self.evaluate = self.evaluate_volume

    def targets(self, ind):
        return [
            ind.find_molecule(target).compound.mol for target in self._targets
        ]

    def molecules(self, ind):
        return tuple(m.compound.mol for m in ind._molecules.values())

    def surface(self, ind):
        atoms = self.zone_atoms(self.targets(ind), self.molecules(ind))
        return grid_sas_surface(atoms)

    def evaluate_area(self, ind):
        return abs(surface_area(*self.surface(ind)) - self.threshold)

    def evaluate_volume(self, ind):
        return abs(enclosed_volume(*self.surface(ind))[0] - self.threshold)

    def zone_atoms(self, probes, molecules):
        self.zone.clear()
        self.zone.add([a for probe in probes for a in probe.atoms])
        if self.radius:
            self.zone.merge(
                chimera.selection.REPLACE,
                chimera.specifier.zone(self.zone, 'atom', None, self.radius,
                                       molecules))
        return self.zone.atoms()
Пример #11
0
class Gold(ObjectiveProvider):
    """
    Gold class

    Parameters
    ----------
    protein : str
        The name of molecule acting as protein
    ligand : str
        The name of molecule acting as ligand
    scoring : str, optional, defaults to chemscore
        Fitness function to use. Choose between chemscore, chemplp,
        goldscore and asp.
    score_component : str, optional, defaults to 'Score'
        Scoring fields to parse out of the rescore.log file, such as
        Score, DG, S(metal), etc.
    radius : float, optional, defaults to 10.0
        Radius (in A) of binding site sphere, the origin of which is
        automatically centered at the ligand's center of mass.

    Returns
    -------
    float
        Interaction energy as reported by GOLD's chosen scoring function
    """
    _validate = {
        parse.Required('protein'): parse.Molecule_name,
        parse.Required('ligand'): parse.Molecule_name,
        'scoring': parse.In(['chemscore', 'chemplp', 'goldscore', 'asp']),
        'radius': parse.Coerce(float),
        'score_component': str,
    }

    def __init__(self,
                 protein='Protein',
                 ligand='Ligand',
                 scoring='chemscore',
                 score_component='Score',
                 radius=10,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.protein_names = [protein]
        self.ligand_names = [ligand]
        self.scoring = scoring
        self.score_component = score_component
        self.radius = radius
        self.executable = find_executable('gold_auto')
        if self.executable is None:
            sys.exit(
                'GOLD could not be found in $PATH. Is it (correctly) installed?'
            )
        self._oldworkingdir = os.getcwd()
        self._paths = {}
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = default_tempdir()

    def get_molecule_by_name(self, ind, *names):
        """
        Get a molecule gene instance of individual by its name
        """
        for name in names:
            yield ind.find_molecule(name)

    def evaluate(self, ind):
        """
        Run a subprocess calling LigScore binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        self.tmpfile = os.path.join(self.tmpdir, next(tempnames()))
        proteins = list(self.get_molecule_by_name(ind, *self.protein_names))
        ligands = list(self.get_molecule_by_name(ind, *self.ligand_names))

        protein_path = self.prepare_proteins(proteins)
        ligand_path = self.prepare_ligands(ligands)
        origin = self.origin(ligands[0])
        command = self.prepare_command(protein_path, ligand_path, origin)

        try:
            os.chdir(self.tmpdir)
            p = subprocess.call(command)
            return self.parse_output('rescore.log')
        except (subprocess.CalledProcessError, IOError):
            logger.warning("Could not run GOLD with command %s", command)
            return -100000 * self.weight
        finally:
            self.clean()
            os.chdir(self._oldworkingdir)

    def prepare_proteins(self, proteins):
        proteinpath = '{}_proteins.pdb'.format(self.tmpfile)
        last_protein = proteins.pop()
        last_protein.write(absolute=proteinpath,
                           combined_with=proteins,
                           filetype='pdb')
        self._paths['proteins'] = proteinpath
        return proteinpath

    def prepare_ligands(self, ligands):
        ligandpath = '{}_ligands.mol2'.format(self.tmpfile)
        ligand_mols = [lig.compound.mol for lig in ligands]

        writeMol2(ligand_mols,
                  ligandpath,
                  temporary=True,
                  multimodelHandling='combined')
        self._paths['ligands'] = ligandpath
        return ligandpath

    def origin(self, molecule):
        molecule = molecule.compound.mol
        coordinates = atom_positions(molecule.atoms, molecule.openState.xform)
        masses = np.fromiter((a.element.mass for a in molecule.atoms),
                             dtype='float32',
                             count=molecule.numAtoms)
        return np.average(coordinates, axis=0, weights=masses)

    def prepare_command(self, protein_path, ligand_path, origin):
        replaces = dict(PROTEIN=protein_path,
                        LIGAND=ligand_path,
                        ORIGIN='{} {} {}'.format(*origin),
                        SCORING=self.scoring,
                        RADIUS=self.radius)
        inputfile = _TEMPLATE.safe_substitute(replaces)
        inputfilepath = self.tmpfile + '.conf'
        with open(self.tmpfile + '.conf', 'w') as f:
            f.write(inputfile)
        self._paths['conf'] = inputfilepath
        return [self.executable, inputfilepath]

    def parse_output(self, filename):
        """ Get last word of first line (and unique) and parse it into float """
        fitness = 4
        with open(filename) as f:
            for line in f:
                if not line.strip():
                    continue
                fields = line.split()
                if fields[0] == 'Status':
                    fitness = fields.index(self.score_component)
                elif fields[0] == 'Ok':
                    return float(fields[fitness])

    def clean(self):
        for p in self._paths.values():
            os.remove(p)
        self._paths.clear()
Пример #12
0
class Mutamers(GeneProvider):

    """
    Mutamers class

    Parameters
    ----------
    residues : list of str
        Residues that can mutate. This has to be in the form:

            [ Protein/233, Protein/109 ]

        where the first element (before slash) is the gaudi.genes.molecule name
        and the second element (after slash) is the residue position number in that
        molecule.

        This list of str is later parsed to the proper chimera.Residue objects

    library : {'Dunbrack', 'Dynameomics'}
        The rotamer library to use.

    mutations : list of str, required
        Aminoacids (in 3-letter codes) residues can mutate to.

    ligation : bool, optional
        If True, all residues will mutate to the same type of aminoacid.

    hydrogens : bool, optional
        If True, add hydrogens to replacing residues (buggy)

    Attributes
    ----------
    allele : list of 2-tuple (str, float)
        For i residues, it contains i tuples with two values each:
        residue type and a float within [0, 1), which will be used
        to pick one of the rotamers for that residue type.
    """
    _validate = {
        parse.Required('residues'): [parse.Named_spec("molecule", "residue")],
        'library': parse.Any('Dunbrack', 'dunbrack', 'Dynameomics', 'dynameomics'),
        'mutations': [parse.ResidueThreeLetterCode],
        'ligation': parse.Boolean,
        'hydrogens': parse.Boolean,
        'avoid_replacement': parse.Boolean,
        }
    
    def __init__(self, residues=None, library='Dunbrack', avoid_replacement=False,
                 mutations=[], ligation=False, hydrogens=False, **kwargs):
        GeneProvider.__init__(self, **kwargs)
        self._kwargs = kwargs
        self._residues = residues
        self.library = library
        self.mutations = mutations
        self.ligation = ligation
        self.hydrogens = hydrogens
        self.avoid_replacement = avoid_replacement
        self.allele = []
        # set caches
        try:
            self.residues = self._cache[self.name + '_residues']
        except KeyError:
            self.residues = self._cache[self.name + '_residues'] = OrderedDict()
            
        try:
            self.rotamers = self._cache[self.name + '_rotamers']
        except KeyError:
            cache_size = len(residues) * (1 + 0.5 * len(mutations))
            self.rotamers = self._cache[self.name + '_rotamers'] = LRU(int(cache_size))

        if self.ligation:
            self.random_number = random.random()
        else:
            self.random_number = None

        # Avoid unnecessary calls to expensive get_rotamers if residue is known
        # to not have any rotamers
        self._residues_without_rotamers = ['ALA', 'GLY']
    
    def __deepcopy__(self, memo):
        new = self.__class__(residues=self._residues, library=self.library, 
                             avoid_replacement=self.avoid_replacement, 
                             mutations=self.mutations[:], ligation=self.ligation, 
                             hydrogens=self.hydrogens, **self._kwargs )
        new.residues = self.residues
        new.rotamers = self.rotamers
        new.allele = self.allele[:]
        new.random_number = self.random_number
        new._residues_without_rotamers = self._residues_without_rotamers
        return new

    def __ready__(self):
        """
        Second stage of initialization.

        It parses the requested residues strings to actual residues.
        """
        for molecule, resid in self._residues:
            for res in self.parent.find_molecule(molecule).find_residues(resid):
                self.residues[(molecule, resid)] = res
                self.allele.append((self.choice(self.mutations + [res.type]),
                                    random.random()))

    def express(self):
        for (mol, pos), (restype, i) in zip(self.residues, self.allele):
            replaced = False
            try:
                residue = self.residues[(mol, pos)]
                rotamer = self.get_rotamers(mol, pos, restype)
            except NoResidueRotamersError:  # ALA, GLY...
                if residue.type != restype:
                    SwapRes.swap(residue, restype)
                    replaced = True
            else:
                rotamer_index = int(i * len(rotamer))
                if self.avoid_replacement and residue.type == restype:
                    self.update_rotamer_coords(residue, rotamer[rotamer_index])
                else:
                    replaceRotamer(residue, [rotamer[rotamer_index]])
                    replaced = True
            if replaced:
                self.residues[(mol, pos)] = \
                    next(r for r in self.parent.genes[mol].compound.mol.residues
                         if r.id.position == pos)

    def unexpress(self):
        for res in self.residues.values():
            for a in res.atoms:
                a.display = 0

    def mate(self, mate):
        if self.ligation:
            self_residues, self_rotamers = zip(*self.allele)
            mate_residues, mate_rotamers = zip(*mate.allele)
            self_rotamers, mate_rotamers = deap.tools.cxTwoPoint(
                list(self_rotamers), list(mate_rotamers))
            self.allele = map(list, zip(self_residues, self_rotamers))
            mate.allele = map(list, zip(mate_residues, mate_rotamers))
        else:
            self.allele, mate.allele = deap.tools.cxTwoPoint(
                self.allele, mate.allele)

    def mutate(self, indpb):
        if random.random() < self.indpb:
            self.allele[:] = []
            if self.ligation:  # don't forget to get a new random!
                self.random_number = random.random()
            for res in self.residues.values():
                self.allele.append(
                    (self.choice(self.mutations + [res.type]),
                        random.random()
                     )
                )

    ###

    def choice(self, l):
        """
        Overrides ``random.choice`` with custom one so we can
        reuse a previously obtained random number. This helps dealing
        with the ``ligation`` parameter, which forces all the requested
        residues to mutate to the same type
        """
        if self.random_number:
            return l[int(self.random_number * len(l))]
        return l[int(random.random() * len(l))]

    def get_rotamers(self, mol, pos, restype):
        """
        Gets the requested rotamers out of cache and if not found,
        creates the library and stores it in the cache.

        Parameters
        ----------
        mol : str
            gaudi.genes.molecule name that contains the residue
        pos : 
            Residue position in `mol`
        restype : 
            Get rotamers of selected position with this type of residue. It does
            not need to be the original type, so this allows mutations

        Returns
        -------
            List of rotamers returned by ``Rotamers.getRotamers``.
        """
        if restype in self._residues_without_rotamers:
            raise NoResidueRotamersError
        try:
            rotamers = self.rotamers[(mol, pos, restype)]
        except KeyError:
            try:
                rotamers = getRotamers(self.residues[(mol, pos)], resType=restype,
                                       lib=self.library.title())[1]
            except NoResidueRotamersError:  # ALA, GLY... has no rotamers
                self._residues_without_rotamers.append(restype)
                raise
            except KeyError:
                raise
            else:
                if self.hydrogens:
                    self.add_hydrogens_to_isolated_rotamer(rotamers)
                self.rotamers[(mol, pos, restype)] = rotamers
        return rotamers

    @staticmethod
    def update_rotamer_coords(residue, rotamer):
        rotamer = rotamer.residues[0]
        for name, rotamer_atoms in rotamer.atomsMap.items():
            for res_atom, rot_atom in zip(residue.atomsMap[name], rotamer_atoms):
                res_atom.setCoord(rot_atom.coord())

    @staticmethod
    def add_hydrogens_to_isolated_rotamer(rotamers):
        # Patch original definitions of atomtypes to account for existing bonds
        # Force trigonal planar geometry so we get a good hydrogen
        # Ideally, we'd use a tetrahedral geometry (4, 3), but with that one the
        # hydrogen we get is sometimes in direct collision with next residues' N
        patched_idatm = IdatmTypeInfo(3, 3)
        unknown_types = {}
        for rot in rotamers:
            for a in rot.atoms:
                if a.name == 'CA':
                    unknown_types[a] = patched_idatm
                    a.idatmType, a.idatmType_orig = "_CA", a.idatmType

        # Add the hydrogens
        simpleAddHydrogens(rotamers, unknownsInfo=unknown_types)
        # Undo the monkey patch
        for rot in rotamers:
            for a in rot.atoms:
                if a.name == 'CA':
                    a.idatmType = a.idatmType_orig
Пример #13
0
class Distance(ObjectiveProvider):

    """
    Distance class

    Parameters
    ----------
    threshold : float
        Optimum distance to meet
    tolerance : float
        Maximum deviation from threshold that is not penalized
    target : str
        The atom to measure the distance to, expressed as
        <molecule name>/<atom serial>
    probes : list of str
        The atoms whose distance to `target` is being measured,
        expressed as <molecule name>/<atom serial>. If more than one
        is provided, the average of all of them is returned
    center_of_mass : bool

    Returns
    -------
    float
        (Mean of) absolute deviation from threshold distance, in A.
    """
    _validate = {
        parse.Required('probes'): parse.AssertList(parse.Named_spec("molecule", "atom")),
        parse.Required('target'): parse.Any(parse.Named_spec("molecule", "atom"),
                                            parse.Coordinates),
        parse.Required('threshold'): parse.Any(parse.Coerce(float), parse.In(['covalent'])),
        'tolerance': parse.Coerce(float),
        'center_of_mass': parse.Coerce(float)
    }

    def __init__(self, threshold=None, tolerance=None, target=None, probes=None,
                 center_of_mass=False, *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.threshold = threshold
        self.tolerance = tolerance
        self.center_of_mass = center_of_mass
        self._probes = probes
        self._target = target
        if self.center_of_mass:
            self.evaluate = self.evaluate_center_of_mass
        else:
            self.evaluate = self.evaluate_distances

    def atoms(self, ind, *targets):
        for target in targets:
            mol, serial = target
            for atom in ind.find_molecule(mol).find_atoms(serial):
                yield atom

    def evaluate_distances(self, ind):
        """
        Measure the distance
        """
        distances = []
        if isinstance(self._target[0], basestring): # AtomSpec like 'Molecule/1'
            target = ind.find_molecule(self._target.molecule
                     ).find_atom(self._target.atom).xformCoord()
        else:  # coordinates
            target = chimera.Point(*self._target)
        for a in self.atoms(ind, *self._probes):
            d = self._distance(a, target)
            if self.threshold == 'covalent':
                threshold = chimera.Element.bondLength(a.element, target.element)
            else:
                threshold = self.threshold
            d = d - threshold
            if self.tolerance is not None and d < self.tolerance:
                distances.append(-1000 * self.weight)
            else:
                distances.append(d)

        return numpy.mean(numpy.absolute(distances))

    def evaluate_center_of_mass(self, ind):
        target = ind.find_molecule(self._target.molecule).find_atom(self._target.atom)
        probes = list(self.atoms(ind, *self._probes))
        center_of_mass = self._center(*probes)

        return target.xformCoord().distance(chimera.Point(*center_of_mass))

    @staticmethod
    def _distance(atom, target):
        return atom.xformCoord().distance(target)

    @staticmethod
    def _center(*atoms):
        coords, masses = [], []
        for a in atoms:
            coords.append(a.xformCoord())
            masses.append(a.element.mass)

        return numpy.average(coords, axis=0, weights=masses)
Пример #14
0
class Trajectory(GeneProvider):
    """
    Parameters
    ----------
    target : str
        The Molecule that contains the topology of the trajectory.
    path : str
        Path to a MD trajectory file, as supported by mdtraj.
    max_frame : int
        Last frame of the trajectory that can be loaded.
    stride : int, optional
        Only load one in every `stride` frames
    preload : bool, optional
        Load the full trajectory in memory to accelerate expression.
        Not recommended for large files!

    Attributes
    ----------
    allele : int
        The index of a frame in the MD trajectory.
    _traj : dict
        Alias to the frames cache
    """

    _validate = {
        parse.Required('target'):
        parse.Molecule_name,
        parse.Required('path'):
        parse.ExpandUserPathExists,
        parse.Required('max_frame'):
        parse.All(parse.Coerce(int), parse.Range(min=1)),
        'stride':
        parse.All(parse.Coerce(int), parse.Range(min=1)),
        'preload':
        bool,
    }

    def __init__(self,
                 target=None,
                 path=None,
                 max_frame=None,
                 stride=1,
                 preload=False,
                 **kwargs):
        GeneProvider.__init__(self, **kwargs)
        self.target = target
        self.path = path
        self.max_frame = max_frame
        self.stride = stride
        self.preload = preload
        try:
            self._traj = self._cache[self.name]
        except KeyError:
            self._traj = self._cache[self.name] = {}

    def __ready__(self):
        self.allele = self.random_frame_number()
        self._original_xyz = self.molecule.xyz(transformed=False)

    def __expression_hooks__(self):
        if self.preload and self.path not in self._traj:
            self._traj[self.path] = mdtraj.load(self.path, top=self.topology)

    @property
    def molecule(self):
        """
        The target Molecule gene
        """
        return self.parent.find_molecule(self.target)

    @property
    def topology(self):
        """
        Returns the equivalent mdtraj Topology object
        of the currently expressed Chimera molecule
        """
        mol = self.molecule.compound.mol
        try:
            return mol._mdtraj_topology
        except AttributeError:
            openmm_top = Energy.chimera_molecule_to_openmm_topology(mol)
            mdtraj_top = mdtraj.Topology.from_openmm(openmm_top)
            mol._mdtraj_topology = mdtraj_top
            return mdtraj_top

    def express(self):
        """
        Load the frame requested by the current allele into
        a new CoordSet object (always at index 1) and set
        that as the active one.
        """
        traj = self.load_frame(self.allele)
        coords = traj.xyz[0] * 10
        for a, xyz in zip(self.molecule.compound.mol.atoms, coords):
            a.setCoord(chimera.Point(*xyz))

    def unexpress(self):
        """
        Set the original coordinates (stored at mol.coordSets[0])
        as the active ones.
        """
        for a, xyz in zip(self.molecule.compound.mol.atoms,
                          self._original_xyz):
            a.setCoord(chimera.Point(*xyz))

    def mate(self, mate):
        """
        Simply exchange alleles. Can't try to interpolate
        an intermediate structure because the result wouldn't
        probably belong to the original trajectory!
        """
        self.allele, mate.allele = mate.allele, self.allele

    def mutate(self, indpb):
        if random.random() < indpb:
            self.allele = self.random_frame_number()

    def random_frame_number(self):
        return random.choice(range(0, self.max_frame, self.stride))

    def load_frame(self, n):
        if self.preload:
            return self._traj[self.path][n]
        return mdtraj.load_frame(self.path, self.allele, top=self.topology)
Пример #15
0
class LigScore(ObjectiveProvider):
    """
    LigScore class

    Parameters
    ----------
    proteins : list of str
        The name of molecules that are acting as proteins
    ligands : list of str
        The name of molecules that are acting as ligands
    binary : str, optional
        Path to ligand_score executable
    library : str, optional
        Path to LigScore lib file

    Returns
    -------
    float
        Interaction energy as reported by IMP's ligand_score.
    """
    _validate = {
        parse.Required('proteins'): [parse.Molecule_name],
        parse.Required('ligands'): [parse.Molecule_name],
        'method': parse.In(['rank', 'pose']),
        'binary': parse.ExpandUserPathExists,
        'library': parse.ExpandUserPathExists,
    }

    def __init__(self,
                 proteins=('Protein', ),
                 ligands=('Ligand', ),
                 method='pose',
                 binary=None,
                 library=None,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.protein_names = proteins
        self.ligand_names = ligands
        self.binary = find_executable(
            'ligand_score') if binary is None else binary
        self.library = library
        self.method = method

        self._oldworkingdir = os.getcwd()
        self._paths = {}
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = default_tempdir()

    def get_molecule_by_name(self, ind, *names):
        """
        Get a molecule gene instance of individual by its name
        """
        for name in names:
            yield ind.find_molecule(name)

    def evaluate(self, ind):
        """
        Run a subprocess calling LigScore binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        self.tmpfile = os.path.join(self.tmpdir, next(tempnames()))
        proteins = list(self.get_molecule_by_name(ind, *self.protein_names))
        ligands = list(self.get_molecule_by_name(ind, *self.ligand_names))

        protein_path = self.prepare_proteins(proteins)
        ligand_path = self.prepare_ligands(ligands)
        command = self.prepare_command(protein_path, ligand_path)

        try:
            os.chdir(self.tmpdir)
            stream = subprocess.check_output(command, universal_newlines=True)
        except subprocess.CalledProcessError:
            logger.warning("Could not run LigScore with command %s", command)
            return -100000 * self.weight
        else:
            return self.parse_output(stream)
        finally:
            self.clean()
            os.chdir(self._oldworkingdir)

    def prepare_proteins(self, proteins):
        proteinpath = '{}_proteins.pdb'.format(self.tmpfile)
        last_protein = proteins.pop()
        last_protein.write(absolute=proteinpath,
                           combined_with=proteins,
                           filetype='pdb')
        self._paths['proteins'] = proteinpath
        return proteinpath

    def prepare_ligands(self, ligands):
        ligandpath = '{}_ligands.mol2'.format(self.tmpfile)
        ligand_mols = [lig.compound.mol for lig in ligands]

        writeMol2(ligand_mols,
                  ligandpath,
                  temporary=True,
                  multimodelHandling='combined')
        self._paths['ligands'] = ligandpath
        return ligandpath

    def prepare_command(self, protein_path, ligand_path):
        cmd = [self.binary, '--' + self.method, ligand_path, protein_path]
        if self.library:
            cmd.append(self.library)
        return map(str, cmd)

    def parse_output(self, stream):
        """ Get last word of first line (and unique) and parse it into float """
        return float(stream.splitlines()[0].split()[-1])

    def clean(self):
        for p in self._paths.values():
            os.remove(p)
        self._paths.clear()
Пример #16
0
class Contacts(ObjectiveProvider):
    """
    Contacts class
    Parameters
    ----------
    probes : str
        Name of molecule gene that is object of contacts analysis
    radius : float
        Maximum distance from any point of probes that is searched
        for possible interactions
    which : {'hydrophobic', 'clashes'}
        Type of interactions to measure
    clash_threshold : float, optional
        Maximum overlap of van-der-Waals spheres that is considered as
        a contact (attractive). If the overlap is greater, it's 
        considered a clash (repulsive)
    hydrophobic_threshold : float, optional
        Maximum overlap for hydrophobic patches.
    hydrophobic_elements : list of str, optional, defaults to [C, S]
        Which elements are allowed to interact in hydrophobic patches
    cutoff : float, optional
        If the overlap volume is greater than this, a penalty is applied. 
        Useful to filter bad solutions.
    bond_separation : int, optional
        Ignore clashes or contacts between atoms within n bonds.
    only_internal : bool, optional
    	If set to True, take into account only intramolecular 
	interactions, defaults to False
    Returns
    -------
    float
        Lennard-Jones-like energy when `which`=`hydrophobic`,
        and volumetric overlap of VdW spheres in A³ if `which`=`clashes`.
    """
    _validate = {
        parse.Required('probes'): [parse.Molecule_name],
        'radius': parse.All(parse.Coerce(float), parse.Range(min=0)),
        'which': parse.In(['hydrophobic', 'clashes']),
        'clash_threshold': parse.Coerce(float),
        'hydrophobic_threshold': parse.Coerce(float),
        'cutoff': parse.Coerce(float),
        'hydrophobic_elements': [basestring],
        'bond_separation': parse.All(parse.Coerce(int), parse.Range(min=2)),
        'same_residue': parse.Coerce(bool),
        'only_internal': parse.Coerce(bool)
    }

    def __init__(self,
                 probes=None,
                 radius=5.0,
                 which='hydrophobic',
                 clash_threshold=0.6,
                 hydrophobic_threshold=-0.4,
                 cutoff=0.0,
                 hydrophobic_elements=('C', 'S'),
                 bond_separation=4,
                 same_residue=True,
                 only_internal=False,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.which = which
        self.radius = radius
        self.clash_threshold = clash_threshold
        self.hydrophobic_threshold = hydrophobic_threshold
        self.cutoff = cutoff
        self.hydrophobic_elements = set(hydrophobic_elements)
        self.bond_separation = bond_separation
        self.same_residue = same_residue
        self._probes = probes
        self.only_internal = only_internal
        if which == 'hydrophobic':
            self.evaluate = self.evaluate_hydrophobic
            self.threshold = hydrophobic_threshold
        else:
            self.evaluate = self.evaluate_clashes
            self.threshold = clash_threshold

    def molecules(self, ind):
        return [m.compound.mol for m in ind._molecules.values()]

    def probes(self, ind):
        return [ind.find_molecule(p).compound.mol for p in self._probes]

    def evaluate_clashes(self, ind):
        positive, negative = self.find_interactions(ind)
        clashscore = sum(
            abs(vol_overlap) for (a1, a2, overlap, vol_overlap) in negative)
        if self.cutoff and clashscore > self.cutoff:
            clashscore = -1000 * self.weight
        return clashscore

    def evaluate_hydrophobic(self, ind):
        positive, negative = self.find_interactions(ind)
        return sum(lj_energy for (a1, a2, overlap, lj_energy) in positive)

    def find_interactions(self, ind):
        atoms = self._surrounding_atoms(ind)
        options = dict(test=atoms,
                       intraRes=self.same_residue,
                       interSubmodel=True,
                       clashThreshold=self.threshold,
                       assumedMaxVdw=2.1,
                       hbondAllowance=0.2,
                       bondSeparation=self.bond_separation)
        clashes = DetectClash.detectClash(atoms, **options)
        return self._analyze_interactions(clashes)

    def _analyze_interactions(self, clashes):
        """
        Interpret contacts provided by DetectClash.
        Parameters
        ----------
        clashes : dict of dict
            Output of DetectClash. It's a dict of atoms, whose values are dicts.
            These subdictionaries contain all the contacting atoms as keys, and
            the respective overlaping length as values.
        Returns
        -------
        positive : list of list
            Each sublist depict an interaction, with four items: the two involved
            atoms, their distance, and their Lennard-Jones score.
        negative : list of list
            Each sublist depict an interaction, with four items: the two involved
            atoms, their distance, and their volumetric overlap.
        .. note ::
            First, collect atoms that can be involved in hydrophobic interactions.
            Namely, C and S.
            Then, iterate the contacting atoms, getting the distances. For each
            interaction, analyze the distance and, based on the threshold, determine
            if it's attractive or repulsive.
            Attractive interactions are weighted with a Lennard-Jones like function
            (``_lennard_jones``), while repulsive attractions are measured with
            the volumetric overlap of the involved atoms' Van der Waals spheres.
        """
        positive, negative = [], []
        for a1, clash in clashes.items():
            for a2, overlap in clash.items():
                # overlap < clash threshold : can be a hydrophobic interaction
                if overlap <= self.clash_threshold:
                    if (a1.element.name in self.hydrophobic_elements
                            and a2.element.name in self.hydrophobic_elements):
                        lj_energy = self._lennard_jones(a1, a2, overlap)
                        positive.append([a1, a2, overlap, lj_energy])
                # overlap > clash threshold : clash!
                else:
                    volumetric_overlap = self._vdw_vol_overlap(a1, a2, overlap)
                    negative.append([a1, a2, overlap, volumetric_overlap])
        return positive, negative

    def _surrounding_atoms(self, ind):
        """
        Get atoms in the search zone, based on the molecule, (possible) rotamer
        genes and the radius
        """
        self.zone.clear()
        #Add all atoms of probes molecules
        self.zone.add([a for m in self.probes(ind) for a in m.atoms])

        if not self.only_internal:
            #Add beta carbons of rotamers to find clashes/contacts in its surroundings
            rotamer_genes = [
                name for name, g in ind.genes.items()
                if g.__class__.__name__ == 'Rotamers'
            ]
            beta_carbons = []
            for n in rotamer_genes:
                for ((molname, pos), residue) in ind.genes[n].residues.items():
                    beta_carbons.extend(
                        [a for a in residue.atoms if a.name == 'CB'])
            self.zone.add(beta_carbons)

            #Surrounding zone from probes+rotamers atoms
            self.zone.merge(
                chimera.selection.REPLACE,
                chimera.specifier.zone(self.zone, 'atom', None, self.radius,
                                       self.molecules(ind)))
        return self.zone.atoms()

    @staticmethod
    def _lennard_jones(a1, a2, overlap=None):
        """
        VERY rough approximation of a Lennard-Jones score (12-6).
        Parameters
        ----------
        a1, a2 : chimera.Atom
        overlap : float
            Overlapping radii of involved atoms, as provided
            by DetectClash.
        Notes
        -----
        The usual implementation of a LJ potential is:
            LJ = 4*epsilon*(0.25*((r0/r)**12) - 0.5*((r0/r)**6))
        Two approximations are done:
            - The atoms involves are considered equal, hence the
              distance at which the energy is minimum (r0) is just
              the sum of their radii.
            - Epsilon is always 1.  
        """
        r0 = a1.radius + a2.radius
        if overlap is None:
            distance = a1.xformCoord().distance(a2.xformCoord())
        else:
            distance = r0 - overlap
        x = (r0 / distance)**6
        return (x * x - 2 * x)

    @staticmethod
    def _vdw_vol_overlap(a1, a2, overlap=None):
        """
        Volumetric overlap of Van der Waals spheres of atoms.
        Parameters
        ----------
        a1, a2 : chimera.Atom
        overlap : float
            Overlapping sphere segment of involved atoms
        .. note ::
            Adapted from Eran Eyal, Comput Chem 25: 712-724, 2004
        """
        PI = 3.14159265359
        if overlap is None:
            d = a1.xformCoord().distance(a2.xformCoord())
        else:
            d = a1.radius + a2.radius - overlap
        if d == 0:
            return 1000
        h_a, h_b = 0, 0
        if d < (a1.radius + a2.radius):
            h_a = (a2.radius**2 - (d - a1.radius)**2) / (2 * d)
            h_b = (a1.radius**2 - (d - a2.radius)**2) / (2 * d)

        return (PI / 3) * ((h_a**2) * (3 * a1.radius - h_a) + (h_b**2) *
                           (3 * a2.radius - h_b))
Пример #17
0
class Vina(ObjectiveProvider):
    """
    Vina class

    Parameters
    ----------
    receptor : str
        Key of the gene containing the molecule acting as receptor (protein)
    ligand : str
        Key of the gene containing the molecule acting as ligand
    prepare_each : bool
        Whether to prepare receptors and ligands in every evaluation or try
        to cache the results for faster performance.

    Returns
    -------
    float
        Interaction energy in kcal/mol, as reported by AutoDock Vina --score-only.
    """
    _validate = {
        parse.Required('receptor'): parse.Molecule_name,
        parse.Required('ligand'): parse.Molecule_name,
        'prepare_each': bool,
    }

    def __init__(self,
                 receptor='Protein',
                 ligand='Ligand',
                 prepare_each=False,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.receptor = receptor
        self.ligand = ligand
        self.prepare_each = prepare_each
        self._paths = []
        self._tmpfile = None
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = _get_default_tempdir()

    def evaluate(self, ind):
        """
        Run a subprocess calling Vina binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        receptor = ind.find_molecule(self.receptor)
        ligand = ind.find_molecule(self.ligand)

        receptor_pdbqt = self.prepare_receptor(receptor)
        ligand_pdbqt = self.prepare_ligand(ligand)
        command = [
            'vina', '--score_only', '--cpu', '1', '--receptor', receptor_pdbqt,
            '--ligand', ligand_pdbqt
        ]

        try:
            stream = check_output(command, universal_newlines=True)
        except CalledProcessError:
            logger.warning("Could not run Vina with command %s", command)
            return -100000 * self.weight
        else:
            return self.parse_output(stream)
        finally:
            self.clean()

    def _prepare(self, molecule, which='receptor'):
        if which == 'receptor':
            preparer = AD4ReceptorPreparation
        elif which == 'ligand':
            preparer = AD4LigandPreparation
        else:
            raise ValueError('which must be receptor or ligand')
        path = '{}_{}.pdb'.format(self.tmpfile, which)
        pathqt = path + 'qt'
        if not os.path.isfile(pathqt):
            pdb = molecule.write(absolute=path, filetype='pdb')
            self._paths.append(path)
            mol = MolKit.Read(path)[0]
            mol.buildBondsByDistance()
            RPO = preparer(mol, outputfilename=pathqt)
            self._paths.append(pathqt)
        else:
            # update coordinates
            self._update_pdbqt_coordinates(molecule.xyz(), pathqt)
        return pathqt

    def prepare_receptor(self, molecule):
        return self._prepare(molecule, 'receptor')

    def prepare_ligand(self, molecule):
        return self._prepare(molecule, 'ligand')

    @staticmethod
    def _update_pdbqt_coordinates(xyz, path):
        def is_atom(line):
            if line[0:6] in ('ATOM  ', 'HETATM'):
                for n in (line[30:38], line[38:46], line[46:54]):
                    try:
                        float(n)
                    except:
                        return False
                return True
            return False

        with open(path, 'r+') as f:
            lines = []
            i = 0
            for line in f:
                if is_atom(line):
                    line = line[:30] + '{:8.3f}{:8.3f}{:8.3f}'.format(
                        *xyz[i]) + line[54:]
                    i += 1
                lines.append(line)
            f.seek(0)
            f.write(''.join(lines))
            f.truncate()

    def parse_output(self, stream):
        for line in stream.splitlines():
            if line[:9] == "Affinity:":
                return float(line.split()[1])
        return -1000 * self.weight

    def clean(self):
        if not self.prepare_each:
            return
        for p in self._paths:
            os.remove(p)
        self._paths = []

    @property
    def tmpfile(self):
        if self.prepare_each or self._tmpfile is None:
            self._tmpfile = os.path.join(self.tmpdir,
                                         next(_get_candidate_names()))
        return self._tmpfile
Пример #18
0
class NWChem(ObjectiveProvider):
    """
    NWChem class

    Parameters
    ----------
    targets : list of str
        Molecule name(s) to be processed with NWChem. Small ones!
    template : str, optional
        NWChem input template (or path to a file with such contents) containing
        a $MOLECULE placeholder to be replaced by the currently expressed
        molecule(s) requested in ``targets``, and optionally, a $TITLE
        placeholder to be replaced by the job name. If not provided, it will
        default to the ``TEMPLATE`` example (single-point dft energy).
    parser : str, optional
        Path to a Python script containing a top-level function called
        `parse_output` which will parse the NWChem output and return a
        float. This replaces the default parser, which looks for the last
        'Total <whatever> energy' value.
    processors : int, optional=None
        Number of physical processors to use with openmpi

    Returns
    -------
    float
        Any numeric value as reported by the `parser` routines. By default,
        last 'Total <whatever> energy' value.
    """
    _validate = {
        parse.Required('targets'): [parse.Molecule_name],
        'template': basestring,
        'parser': parse.ExpandUserPathExists,
        'processors': int,
        'title': str,
        'basis_library': parse.ExpandUserPathExists,
    }

    def __init__(self,
                 template=None,
                 targets=('Ligand', ),
                 parser=None,
                 title=None,
                 executable=None,
                 basis_library=None,
                 processors=None,
                 *args,
                 **kwargs):
        if kwargs.get('precision', 6) < 6:
            kwargs['precision'] = 6
        ObjectiveProvider.__init__(self, **kwargs)
        self.targets = targets
        self.executable = find_executable(
            'nwchem') if executable is None else executable
        if self.executable is None:
            sys.exit(
                'NWChem could not be found in $PATH. Is it (correctly) installed?'
            )
        self._nprocessors = processors
        self._mpirun = find_executable('mpirun') if processors else None
        self._title = title if title is not None else self.environment.cfg.output.name
        if template is None:
            self.template = TEMPLATE
        elif os.path.isfile(template):
            with open(template) as f:
                self.template = f.read()
        else:
            self.template = template
        self.template = self.template.replace('$TITLE', self._title)
        if parser is not None:
            self.parse_output = imp.load_source('_nwchem_parser',
                                                parser).parse_output
        if basis_library is not None and os.path.isdir(basis_library):
            if basis_library[-1] != '/':
                basis_library += '/'
            os.environ['NWCHEM_BASIS_LIBRARY'] = basis_library
        self._oldworkingdir = os.getcwd()
        self._paths = {}
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = default_tempdir()

    def get_molecule_by_name(self, ind, *names):
        """
        Get a molecule gene instance of individual by its name
        """
        for name in names:
            yield ind.find_molecule(name)

    def evaluate(self, ind):
        """
        Run a subprocess calling DSX binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        self._tmpfile = os.path.join(self.tmpdir, next(tempnames()))
        os.chdir(self.tmpdir)
        molecules = list(self.get_molecule_by_name(ind, *self.targets))
        nwfile = self.prepare_nwfile(*molecules)
        command = []
        if self._mpirun:
            command.extend([self._mpirun, '-n', str(self._nprocessors)])
        command.extend([self.executable, nwfile])
        try:
            p = subprocess.Popen(command,
                                 universal_newlines=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            stdout, stderr = p.communicate()
            if stderr:
                logger.warning(stderr)
        except subprocess.CalledProcessError:
            logger.warning("Could not run NWChem with command %s", command)
            return -100000 * self.weight
        else:
            return self.parse_output(stdout)
        finally:
            self.clean()
            os.chdir(self._oldworkingdir)

    def prepare_nwfile(self, *molecules):
        xyz = self.get_xyz(*molecules)
        contents = self.template.replace('$MOLECULE', xyz)
        with open(self._tmpfile + '.nw', 'w') as f:
            f.write(contents)
        return self._tmpfile + '.nw'

    def get_xyz(self, *molecules):
        xyz = []
        for m in molecules:
            xyz.extend(self._xyzlines(m))
        return '\n'.join(xyz)

    def _xyzlines(self, molecule):
        lines = []
        for a in molecule.compound.mol.atoms:
            lines.append('{} {} {} {}'.format(a.element.name,
                                              *a.xformCoord().data()))
        return lines

    def parse_output(self, stream):
        result = -100000 * self.weight
        for line in stream.splitlines():
            matches = re.search('Total \w+ energy = *([\d\.-]+)', line)
            if matches:
                result = float(matches.group(1))
        return result

    def clean(self):
        os.remove(self._tmpfile + '.nw')
Пример #19
0
class Vina(ObjectiveProvider):
    """
    Vina class

    Parameters
    ----------
    receptor : str
        Key of the gene containing the molecule acting as receptor (protein)
    ligand : str
        Key of the gene containing the molecule acting as ligand

    Returns
    -------
    float
        Interaction energy in kcal/mol, as reported by AutoDock Vina --score-only.
    """
    _validate = {
        parse.Required('receptor'): parse.Molecule_name,
        parse.Required('ligand'): parse.Molecule_name,
    }

    def __init__(self, receptor='Protein', ligand='Ligand', *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.receptor = receptor
        self.ligand = ligand
        self._paths = []
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = _get_default_tempdir()

    def evaluate(self, ind):
        """
        Run a subprocess calling Vina binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        self.tmpfile = os.path.join(self.tmpdir, next(_get_candidate_names()))
        receptor = ind.find_molecule(self.receptor)
        ligand = ind.find_molecule(self.ligand)

        receptor_pdbqt = self.prepare_receptor(receptor)
        ligand_pdbqt = self.prepare_ligand(ligand)
        command = [
            'vina', '--score_only', '--cpu', '1', '--receptor', receptor_pdbqt,
            '--ligand', ligand_pdbqt
        ]

        try:
            stream = check_output(command, universal_newlines=True)
        except CalledProcessError:
            logger.warning("Could not run Vina with command %s", command)
            return -100000 * self.weight
        else:
            return self.parse_output(stream)
        finally:
            self.clean()

    def prepare_receptor(self, molecule):
        path = '{}_receptor.pdb'.format(self.tmpfile)
        pathqt = path + 'qt'
        pdb = molecule.write(absolute=path, filetype='pdb')
        self._paths.append(path)
        mol = MolKit.Read(path)[0]
        mol.buildBondsByDistance()
        RPO = AD4ReceptorPreparation(mol, outputfilename=pathqt)
        self._paths.append(pathqt)
        return pathqt

    def prepare_ligand(self, molecule):
        path = '{}_ligand.pdb'.format(self.tmpfile)
        pathqt = path + 'qt'
        pdb = molecule.write(absolute=path, filetype='pdb')
        self._paths.append(path)
        mol = MolKit.Read(path)[0]
        mol.buildBondsByDistance()
        RPO = AD4LigandPreparation(mol, outputfilename=pathqt)
        #    inactivate_all_torsions=True)
        self._paths.append(pathqt)
        return pathqt

    def parse_output(self, stream):
        for line in stream.splitlines():
            if line[:9] == "Affinity:":
                return float(line.split()[1])
        return -1000 * self.weight

    def clean(self):
        for p in self._paths:
            os.remove(p)
        self._paths = []
Пример #20
0
class Vina(ObjectiveProvider):

    """
    Vina class

    Parameters
    ----------
    receptor : str
        Key of the gene containing the molecule acting as receptor (protein)
    ligand : str
        Key of the gene containing the molecule acting as ligand
    prepare_each : bool
        Whether to prepare receptors and ligands in every evaluation or try
        to cache the results for faster performance.

    Returns
    -------
    float
        Interaction energy in kcal/mol, as reported by AutoDock Vina --score-only.

    Notes
    -----
    - AutoDock scripts ``prepare_ligand4.py`` and ``prepare_receptor4.py`` are
    used to prepare the corresponding .pdqt files that will be used as input for 
    AutoDock Vina scorer.
    - No repairs nor cleanups will be performed on ligand/receptor molecules, so 
    the user has to take into account that provided .mol2 or .pdb files have 
    correct atom types and correct structure (including Hydrogen atoms that will 
    be taken into account in the docking evaluation). Otherwise, AutoDock 
    errors/warnings could appear (e.g. ``ValueError: Could not find atomic number 
    for Lp Lp``)
    - Gasteiger charges will be added during the preparation of the .pdbqt files.
    - All torsions of the ligand will be marked as ``inactive`` for AutoDock, 
    because torsion changes are part of GaudiMM genes.
    """
    _validate = {
        parse.Required('receptor'): parse.Molecule_name,
        parse.Required('ligand'): parse.Molecule_name,
        'prepare_each': bool,
        }

    def __init__(self, receptor='Protein', ligand='Ligand', prepare_each=False,
                 *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.receptor = receptor
        self.ligand = ligand
        self.prepare_each = prepare_each
        self._paths = []
        self._tmpfile = None
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = _get_default_tempdir()

    def evaluate(self, ind):
        """
        Run a subprocess calling Vina binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        receptor = ind.find_molecule(self.receptor)
        ligand = ind.find_molecule(self.ligand)

        receptor_pdbqt = self.prepare_receptor(receptor)
        ligand_pdbqt = self.prepare_ligand(ligand)
        command = ['vina', '--score_only', '--cpu', '1',
                   '--receptor', receptor_pdbqt, '--ligand', ligand_pdbqt]

        try:
            stream = check_output(command, universal_newlines=True)
        except CalledProcessError:
            logger.warning("Could not run Vina with command %s", command)
            return -100000 * self.weight
        else:
            return self.parse_output(stream)
        finally:
            self.clean()

    def _prepare(self, molecule, which='receptor'):
        if which == 'receptor':
            preparer = AD4ReceptorPreparation
            kwargs = {"repairs": '', "cleanup": ''}
        elif which == 'ligand':
            preparer = AD4LigandPreparation
            kwargs = {"repairs": '', "cleanup": '', "inactivate_all_torsions": True}
        else:
            raise ValueError('which must be receptor or ligand')
        path = '{}_{}.pdb'.format(self.tmpfile, which)
        pathqt = path + 'qt'
        if not os.path.isfile(pathqt):
            pdb = molecule.write(absolute=path, filetype='pdb')
            self._paths.append(path)
            mol = MolKit.Read(path)[0]
            mol.buildBondsByDistance()
            RPO = preparer(mol, outputfilename=pathqt, **kwargs)
            self._paths.append(pathqt)
        else:
            # update coordinates
            self._update_pdbqt_coordinates(molecule.xyz(), pathqt)
        return pathqt

    def prepare_receptor(self, molecule):
        return self._prepare(molecule, 'receptor')

    def prepare_ligand(self, molecule):
        return self._prepare(molecule, 'ligand')

    @staticmethod
    def _update_pdbqt_coordinates(xyz, path):
        def is_atom(line):
            if line[0:6] in ('ATOM  ', 'HETATM'):
                for n in (line[30:38], line[38:46], line[46:54]):
                    try:
                        float(n)
                    except:
                        return False
                return True
            return False

        with open(path, 'r+') as f:
            lines = []
            i = 0
            for line in f:
                if is_atom(line):
                    line = line[:30] + '{:8.3f}{:8.3f}{:8.3f}'.format(*xyz[i]) + line[54:]
                    i += 1
                lines.append(line)
            f.seek(0)
            f.write(''.join(lines))
            f.truncate()


    def parse_output(self, stream):
        for line in stream.splitlines():
            if line[:9] == "Affinity:":
                return float(line.split()[1])
        return -1000 * self.weight

    def clean(self):
        if not self.prepare_each:
            return
        for p in self._paths:
            os.remove(p)
        self._paths = []

    @property
    def tmpfile(self):
        if self.prepare_each or self._tmpfile is None:
            self._tmpfile = os.path.join(self.tmpdir, next(_get_candidate_names()))
        return self._tmpfile
Пример #21
0
class GeneProvider(object):
    """
    Base class that every `genes` plugin MUST inherit.

    The methods listed here are compulsory for all subclasses, since the
    individual will be using them anyway. If it's not relevant in your plugin,
    just define them with a single `pass` statement. Also, don't forget to call
    `GeneProvider.__init__` in your overriden `__init__` function; it registers
    compulsory

    ---
    From (M.A. itself)[http://martyalchin.com/2008/jan/10/simple-plugin-framework/]:
    Now that we have a mount point, we can start stacking plugins onto it.
    As mentioned above, individual plugins will subclass the mount point.
    Because that also means inheriting the metaclass, the act of subclassing
    alone will suffice as plugin registration. Of course, the goal is to have
    plugins actually do something, so there would be more to it than just
    defining a base class, but the point is that the entire contents of the
    class declaration can be specific to the plugin being written. The plugin
    framework itself has absolutely no expectation for how you build the class,
    allowing maximum flexibility. Duck typing at its finest.
    """

    # This sole line is the magic behind the plugin system!
    __metaclass__ = plugin.PluginMount
    _cache = {}
    _validate = {}
    _schema = {
        parse.Required('parent'): Individual,
        'name': str,
        'module': parse.Importable,
        'cx_eta': parse.Coerce(float),
        'mut_eta': parse.Coerce(float),
        'mut_indpb': parse.Coerce(float)
    }

    def __init__(self,
                 parent=None,
                 name=None,
                 cx_eta=5.0,
                 mut_eta=5.0,
                 mut_indpb=0.75,
                 **kwargs):
        self.parent = parent
        self.name = name if name is not None else str(uuid4())
        self.cxeta = cx_eta
        self.mteta = mut_eta
        self.indpb = mut_indpb
        self.allele = None

    def __ready__(self):
        pass

    def __expression_hooks__(self):
        pass

    @abc.abstractmethod
    def express(self):
        """
        Compile the gene to an evaluable object.
        """

    @abc.abstractmethod
    def unexpress(self):
        """
        Revert expression.
        """

    @abc.abstractmethod
    def mutate(self):
        """
        Perform a mutation on the gene.
        """

    @abc.abstractmethod
    def mate(self, gene):
        """
        Perform a crossover with another gene of the same kind.
        """

    @classmethod
    def validate(cls, data, schema=None):
        schema = cls._schema.copy() if schema is None else schema
        schema.update(cls._validate)
        return parse.validate(schema, data)

    @classmethod
    def with_validation(cls, **kwargs):
        return cls(**cls.validate(kwargs))

    def write(self, path, name, *args, **kwargs):
        """
        Write results of expression to a file representation.
        """
        fullname = os.path.join(path, '{}_{}.txt'.format(name, self.name))
        with open(fullname, 'w') as f:
            f.write(pp.pformat(self.allele))
        return fullname

    @classmethod
    def clear_cache(cls):
        cls._cache.clear()
Пример #22
0
class ObjectiveProvider(object):
    """
    Base class that every `objectives` plugin MUST inherit.

    Mount point for plugins implementing new objectives to be evaluated by DEAP.
    The objective resides within the Fitness attribute of the individual.
    Do whatever you want, but use an evaluate() function to return the results.
    Apart from that, there's no requirements.

    The base class includes some useful attributes, so don't forget to call
    `ObjectiveProvider.__init__` in your overriden `__init__`. For example,
    `self.zone` is a `Chimera.selection.ItemizedSelection` object which is shared among
    all objectives. Use that to get atoms in the surrounding of the target gene, and
    remember to `self.zone.clear()` it before use.

    ---
    From (M.A. itself)[http://martyalchin.com/2008/jan/10/simple-plugin-framework/]:
    Now that we have a mount point, we can start stacking plugins onto it.
    As mentioned above, individual plugins will subclass the mount point.
    Because that also means inheriting the metaclass, the act of subclassing
    alone will suffice as plugin registration. Of course, the goal is to have
    plugins actually do something, so there would be more to it than just
    defining a base class, but the point is that the entire contents of the
    class declaration can be specific to the plugin being written. The plugin
    framework itself has absolutely no expectation for how you build the class,
    allowing maximum flexibility. Duck typing at its finest.
    """

    __metaclass__ = plugin.PluginMount
    _cache = {}
    _validate = {}
    _schema = {
        parse.Required('environment'): Environment,
        'module': parse.Importable,
        'name': str,
        'weight': parse.Coerce(float),
        'zone': chimera.selection.ItemizedSelection,
        'precision': parse.All(parse.Coerce(int), parse.Range(min=0, max=9))
    }

    def __init__(self,
                 environment=None,
                 name=None,
                 weight=None,
                 zone=None,
                 precision=3,
                 **kwargs):
        self.environment = environment
        self.name = name if name is not None else str(uuid4())
        self.weight = weight
        self.zone = zone if zone is not None else chimera.selection.ItemizedSelection(
        )
        self.precision = precision

    def __ready__(self):
        pass

    @abc.abstractmethod
    def evaluate(self, individual):
        """
        Return the score of the individual under the current conditions.
        """

    @classmethod
    def clear_cache(cls):
        cls._cache.clear()

    @classmethod
    def validate(cls, data, schema=None):
        schema = cls._schema.copy() if schema is None else schema
        schema.update(cls._validate)
        return parse.validate(schema, data)

    @classmethod
    def with_validation(cls, **kwargs):
        cls.__init__(**cls.validate(kwargs))