Пример #1
0
def _fillMolecule(name, resname, chain, resid, insertion, coords, segid,
                  element, occupancy, beta, charge, record):
    numAtoms = len(name)
    mol = Molecule()
    mol.empty(numAtoms)

    mol.name = np.array(name, dtype=mol._dtypes['name'])
    mol.resname = np.array(resname, dtype=mol._dtypes['resname'])
    mol.chain = np.array(chain, dtype=mol._dtypes['chain'])
    mol.resid = np.array(resid, dtype=mol._dtypes['resid'])
    mol.insertion = np.array(insertion, dtype=mol._dtypes['insertion'])
    mol.coords = np.array(np.atleast_3d(np.vstack(coords)),
                          dtype=mol._dtypes['coords'])
    mol.segid = np.array(segid, dtype=mol._dtypes['segid'])
    mol.element = np.array(element, dtype=mol._dtypes['element'])
    mol.occupancy = np.array(occupancy, dtype=mol._dtypes['occupancy'])
    mol.beta = np.array(beta, dtype=mol._dtypes['beta'])
    # mol.charge = np.array(charge, dtype=mol._dtypes['charge'])
    # mol.record = np.array(record, dtype=mol._dtypes['record'])
    return mol
Пример #2
0
def _applyProteinCaps(mol, caps):

    # AMBER capping
    # =============
    # This is the (horrible) way of adding caps in tleap:
    # For now, this is hardwired for ACE and NME
    # 1. Change one of the hydrogens of the N terminal (H[T]?[123]) to the ACE C atom, giving it a new resid
    # 1a. If no hydrogen present, create the ACE C atom.
    # 2. Change one of the oxygens of the C terminal ({O,OT1,OXT}) to the NME N atom, giving it a new resid
    # 2a. If no oxygen present, create the NME N atom.
    # 3. Reorder to put the new atoms first and last
    # 4. Remove the lingering hydrogens of the N terminal and oxygens of the C terminal.

    # Define the atoms to be replaced (0 and 1 corresponds to N- and C-terminal caps)
    terminalatoms = {
        'ACE': ['H1', 'H2', 'H3', 'HT1', 'HT2', 'HT3'],
        'NME': ['OXT', 'OT1', 'O']
    }  # XPLOR names for H[123] and OXT are HT[123]
    # and OT1, respectively.
    capresname = ['ACE', 'NME']
    capatomtype = ['C', 'N']

    # For each caps definition
    for seg in caps:
        prot = mol.atomselect(
            'protein'
        )  # Can't move this out since we remove atoms in this loop
        # Get the segment
        segment = np.where(mol.segid == seg)[0]
        # Test segment
        if len(segment) == 0:
            raise RuntimeError(
                'There is no segment {} in the molecule.'.format(seg))
        if not np.any(prot & (mol.segid == seg)):
            raise RuntimeError(
                'Segment {} is not protein. Capping for non-protein segments is not supported.'
                .format(seg))
        # For each cap
        passed = False
        for i, cap in enumerate(caps[seg]):
            if cap is None or (isinstance(cap, str) and cap == 'none'):
                continue
            # Get info on segment and its terminals
            segidm = mol.segid == seg  # Mask for segid
            segididx = np.where(segidm)[0]
            resids = mol.resid[segididx]
            terminalids = [segididx[0], segididx[-1]]
            terminalresids = [resids[0], resids[-1]]
            residm = mol.resid == terminalresids[i]  # Mask for resid

            if not passed:
                orig_terminalresids = terminalresids
                passed = True

            if cap is None or cap == '':  # In case there is no cap defined
                logger.warning(
                    'No cap provided for resid {} on segment {}. Did not apply it.'
                    .format(terminalresids[i], seg))
                continue
            elif cap not in capresname:  # If it is defined, test if supported
                raise RuntimeError(
                    'In segment {}, the {} cap is not supported. Try using {} instead.'
                    .format(seg, cap, capresname))

            # Test if cap is already applied
            testcap = np.where(segidm & residm & (mol.resname == cap))[0]
            if len(testcap) != 0:
                logger.warning(
                    'Cap {} already exists on segment {}. Did not re-apply it.'
                    .format(cap, seg))
                continue

            # Test if the atom to change exists
            termatomsids = np.zeros(residm.shape, dtype=bool)
            for atm in terminalatoms[cap]:
                termatomsids |= mol.name == atm
            termatomsids = np.where(termatomsids & segidm & residm)[0]

            if len(termatomsids) == 0:
                # Create new atom
                termcaid = np.where(segidm & residm & (mol.name == 'CA'))[0]
                termcenterid = np.where(segidm & residm
                                        & (mol.name == capatomtype[1 - i]))[0]
                atom = Molecule()
                atom.empty(1)
                atom.record = np.array(['ATOM'],
                                       dtype=Molecule._dtypes['record'])
                atom.name = np.array([capatomtype[i]],
                                     dtype=Molecule._dtypes['name'])
                atom.resid = np.array([terminalresids[i] - 1 + 2 * i],
                                      dtype=Molecule._dtypes['resid'])
                atom.resname = np.array([cap],
                                        dtype=Molecule._dtypes['resname'])
                atom.segid = np.array([seg], dtype=Molecule._dtypes['segid'])
                atom.element = np.array([capatomtype[i]],
                                        dtype=Molecule._dtypes['element'])
                atom.chain = np.array(
                    [np.unique(mol.chain[segidm])],
                    dtype=Molecule._dtypes['chain']
                )  # TODO: Assumption of single chain in a segment might be wrong
                atom.coords = mol.coords[termcenterid] + 0.33 * np.subtract(
                    mol.coords[termcenterid], mol.coords[termcaid])
                mol.insert(atom, terminalids[i])
                # logger.info('In segment {}, resid {} had none of these atoms: {}. Capping was performed by creating '
                #             'a new atom for cap construction by tleap.'.format(seg, terminalresids[i],
                #                                                                ' '.join(terminalatoms[cap])))

            else:
                # Select atom to change, do changes to cap, and change resid
                newatom = np.max(termatomsids)
                mol.set('resname', cap, sel=newatom)
                mol.set('name', capatomtype[i], sel=newatom)
                mol.set('element', capatomtype[i], sel=newatom)
                mol.set('resid', terminalresids[i] - 1 + 2 * i,
                        sel=newatom)  # if i=0 => resid-1; i=1 => resid+1

                # Reorder
                neworder = np.arange(mol.numAtoms)
                neworder[newatom] = terminalids[i]
                neworder[terminalids[i]] = newatom
                _reorderMol(mol, neworder)

        # For each cap
        for i, cap in enumerate(caps[seg]):
            if cap is None or (isinstance(cap, str) and cap == 'none'):
                continue
            # Remove lingering hydrogens or oxygens in terminals
            mol.remove('segid {} and resid "{}" and name {}'.format(
                seg, orig_terminalresids[i], ' '.join(terminalatoms[cap])),
                       _logger=False)

    # Remove terminal hydrogens regardless of caps or no caps. tleap confuses itself when it makes residues into terminals.
    # For example HID has an atom H which in NHID should become H[123] and crashes tleap.
    for seg in np.unique(mol.get('segid', 'protein')):
        segidm = mol.segid == seg  # Mask for segid
        segididx = np.where(segidm)[0]
        resids = mol.resid[segididx]
        mol.remove('(resid "{}" "{}") and segid {} and hydrogen'.format(
            resids[0], resids[-1], seg),
                   _logger=False)
Пример #3
0
def _applyProteinCaps(mol, caps):

    # AMBER capping
    # =============
    # This is the (horrible) way of adding caps in tleap:
    # For now, this is hardwired for ACE and NME
    # 1. Change one of the hydrogens of the N terminal (H[T]?[123]) to the ACE C atom, giving it a new resid
    # 1a. If no hydrogen present, create the ACE C atom.
    # 2. Change one of the oxygens of the C terminal ({O,OT1,OXT}) to the NME N atom, giving it a new resid
    # 2a. If no oxygen present, create the NME N atom.
    # 3. Reorder to put the new atoms first and last
    # 4. Remove the lingering hydrogens of the N terminal and oxygens of the C terminal.

    # Define the atoms to be replaced (0 and 1 corresponds to N- and C-terminal caps)
    terminalatoms = {
        "ACE": ["H1", "H2", "H3", "HT1", "HT2", "HT3"],
        "NME": ["OXT", "OT1", "O"],
    }  # XPLOR names for H[123] and OXT are HT[123]
    # and OT1, respectively.
    capresname = ["ACE", "NME"]
    capatomtype = ["C", "N"]

    # For each caps definition
    for seg in caps:
        prot = mol.atomselect(
            "protein"
        )  # Can't move this out since we remove atoms in this loop
        # Get the segment
        segment = np.where(mol.segid == seg)[0]
        # Test segment
        if len(segment) == 0:
            raise RuntimeError(f"There is no segment {seg} in the molecule.")
        if not np.any(prot & (mol.segid == seg)):
            raise RuntimeError(
                f"Segment {seg} is not protein. Capping for non-protein segments is not supported."
            )
        # For each cap
        passed = False
        for i, cap in enumerate(caps[seg]):
            if cap is None or (isinstance(cap, str) and cap == "none"):
                continue
            # Get info on segment and its terminals
            segidm = mol.segid == seg  # Mask for segid
            segididx = np.where(segidm)[0]
            resids = mol.resid[segididx]
            terminalids = [segididx[0], segididx[-1]]
            terminalresids = [resids[0], resids[-1]]
            residm = mol.resid == terminalresids[i]  # Mask for resid

            if not passed:
                orig_terminalresids = terminalresids
                passed = True

            if cap is None or cap == "":  # In case there is no cap defined
                logger.warning(
                    f"No cap provided for resid {terminalresids[i]} on segment {seg}. Did not apply it."
                )
                continue
            elif cap not in capresname:  # If it is defined, test if supported
                raise RuntimeError(
                    f"In segment {seg}, the {cap} cap is not supported. Try using {capresname} instead."
                )

            # Test if cap is already applied
            testcap = np.where(segidm & residm & (mol.resname == cap))[0]
            if len(testcap) != 0:
                logger.warning(
                    f"Cap {cap} already exists on segment {seg}. Did not re-apply it."
                )
                continue

            # Test if the atom to change exists
            termatomsids = np.zeros(residm.shape, dtype=bool)
            for atm in terminalatoms[cap]:
                termatomsids |= mol.name == atm
            termatomsids = np.where(termatomsids & segidm & residm)[0]

            if len(termatomsids) == 0:
                # Create new atom
                termcaid = np.where(segidm & residm & (mol.name == "CA"))[0]
                termcenterid = np.where(segidm & residm
                                        & (mol.name == capatomtype[1 - i]))[0]
                atom = Molecule()
                atom.empty(1)
                atom.record = np.array(["ATOM"],
                                       dtype=Molecule._dtypes["record"])
                atom.name = np.array([capatomtype[i]],
                                     dtype=Molecule._dtypes["name"])
                atom.resid = np.array([terminalresids[i] - 1 + 2 * i],
                                      dtype=Molecule._dtypes["resid"])
                atom.resname = np.array([cap],
                                        dtype=Molecule._dtypes["resname"])
                atom.segid = np.array([seg], dtype=Molecule._dtypes["segid"])
                atom.element = np.array([capatomtype[i]],
                                        dtype=Molecule._dtypes["element"])
                atom.chain = np.array(
                    [np.unique(mol.chain[segidm])],
                    dtype=Molecule._dtypes["chain"]
                )  # TODO: Assumption of single chain in a segment might be wrong
                atom.coords = mol.coords[termcenterid] + 0.33 * np.subtract(
                    mol.coords[termcenterid], mol.coords[termcaid])
                mol.insert(atom, terminalids[i])
            else:
                # Select atom to change, do changes to cap, and change resid
                newatom = np.max(termatomsids)
                mol.set("resname", cap, sel=newatom)
                mol.set("name", capatomtype[i], sel=newatom)
                mol.set("element", capatomtype[i], sel=newatom)
                mol.set("resid", terminalresids[i] - 1 + 2 * i,
                        sel=newatom)  # if i=0 => resid-1; i=1 => resid+1

                # Reorder
                neworder = np.arange(mol.numAtoms)
                neworder[newatom] = terminalids[i]
                neworder[terminalids[i]] = newatom
                _reorderMol(mol, neworder)

        # For each cap
        for i, cap in enumerate(caps[seg]):
            if cap is None or (isinstance(cap, str) and cap == "none"):
                continue
            # Remove lingering hydrogens or oxygens in terminals
            mol.remove(
                'segid {} and resid "{}" and name {}'.format(
                    seg, orig_terminalresids[i], " ".join(terminalatoms[cap])),
                _logger=False,
            )

    # Remove terminal hydrogens regardless of caps or no caps. tleap confuses itself when it makes residues into terminals.
    # For example HID has an atom H which in NHID should become H[123] and crashes tleap.
    for seg in np.unique(mol.get("segid", "protein")):
        segidm = mol.segid == seg  # Mask for segid
        segididx = np.where(segidm)[0]
        resids = mol.resid[segididx]
        mol.remove(
            f'(resid "{resids[0]}" "{resids[-1]}") and segid {seg} and hydrogen',
            _logger=False,
        )