示例#1
0
def main():
    from argparse import ArgumentParser

    parser = ArgumentParser()

    parser.add_argument("--ftfile-line",
                        "-n",
                        type=int,
                        default=0,
                        help="0 indexed")
    parser.add_argument("--limit",
                        "-l",
                        type=int,
                        default=None,
                        help="limit for number of ft lines read")
    parser.add_argument("ligand", help="ligand used for docking")
    parser.add_argument("ft_file",
                        help="ftresults file to draw transformations from.")
    parser.add_argument(
        "rot_file",
        help="Rotation file used during PIPER run to make ft_file.")
    parser.add_argument("--ft_limit",
                        help="Speficy how many ftfile lines to read")

    args = parser.parse_args()

    rotations = read_rotations(args.rot_file)

    #read ft_files
    ftresults = read_ftresults(args.ft_file, args.limit)

    #get center information for original ligands
    lig_orig = parsePDB(args.ligand)
    lig_center = np.mean(lig_orig.getCoords(), axis=0)
    lig_name = args.ligand
    lig_base = lig_name.rsplit('/', 1)[-1][:-4]

    #apply ft lines to the aligned ligands
    coords_apply = apply_ftresults_atom_group(lig_orig,
                                              ftresults,
                                              rotations,
                                              center=lig_center)

    #create new atom group for translated ligand
    lig_new = AtomGroup('%s.%s.pdb' % (lig_base, args.ftfile_line))

    #only choose one coordinate set (0 indexed)
    coords_apply.setACSIndex(args.ftfile_line)
    lig_new.setCoords(coords_apply.getCoords())
    lig_new.setNames(lig_orig.getNames())
    lig_new.setResnames(lig_orig.getResnames())
    lig_new.setResnums(lig_orig.getResnums())

    #write new ligand file
    writePDB("%s.%s.pdb" % (lig_base, args.ftfile_line), lig_new)
示例#2
0
 def _get_box_from_ag_coe(self, ag: prody.AtomGroup):
     coords = ag.getCoords()
     cmax, cmin = coords.max(0), coords.min(0)
     coe = (cmax + cmin) / 2
     size = self._get_box_size_from_ag_and_center(ag, coe)
     origin = coe - size * self.cell / 2
     return origin.astype(float), size.astype(int) + 1
def get_alpha_indices(protein: pd.AtomGroup) -> List[int]:
    """
    Get indices of alpha carbons of pd AtomGroup object
    """
    return [
        i for i, a in enumerate(protein.iterAtoms()) if a.getName() == "CA"
    ]
示例#4
0
    def from_prody_atomgroup(
        cls,
        name: ProteinKey,
        protein: pd.AtomGroup,
        split_type: SplitType = SplitType.KMER,
        split_size: int = 16,
        selection: str = "calpha",
        upsample_rate: int = 50,
        moment_types: List[MomentType] = (
            MomentType.O_3,
            MomentType.O_4,
            MomentType.O_5,
            MomentType.F,
        ),
    ):
        """
        Construct MomentInvariants instance from a ProDy AtomGroup object.
        Selects according to `selection` string, (default = alpha carbons)
        `moment_types` determines which moments are calculated.

        Example
        --------
        >>> invariants = MomentInvariants.from_prody_atomgroup(atom_group, split_type=SplitType.RADIUS, moment_types=[MomentType.O_3, MomentType.F, MomentType.phi_7, MomentType.phi_12])
        """
        protein: pd.AtomGroup = protein.select("protein").select(selection)
        coordinates: np.ndarray = protein.getCoords()
        residue_splits = group_indices(protein.getResindices())
        shape = cls(
            name,
            len(residue_splits),
            coordinates,
            residue_splits,
            protein.getIndices(),
            sequence=protein.getSequence(),
            split_type=split_type,
            split_size=split_size,
            upsample_rate=upsample_rate,
            moment_types=moment_types,
        )
        shape._split(split_type)
        return shape
示例#5
0
def get_beta_indices(protein: pd.AtomGroup) -> List[int]:
    """
    Get indices of beta carbons of pd AtomGroup object
    (If beta carbon doesn't exist, alpha carbon index is returned)
    """
    residue_splits = group_indices(protein.getResindices())
    i = 0
    indices = []
    for split in residue_splits:
        ca = None
        cb = None
        for _ in split:
            if protein[i].getName() == "CB":
                cb = protein[i].getIndex()
            if protein[i].getName() == "CA":
                ca = protein[i].getIndex()
            i += 1
        if cb is not None:
            indices.append(cb)
        else:
            assert ca is not None
            indices.append(ca)
    return indices
示例#6
0
 def _get_com(self, ag: prody.AtomGroup):
     return ag.getCoords().mean(axis=0)
示例#7
0
 def _get_coe(self, ag: prody.AtomGroup):
     coords = ag.getCoords()
     cmax, cmin = coords.max(0), coords.min(0)
     coe = (cmax + cmin) / 2
     return coe
示例#8
0
 def _get_box_from_ag_com(self, ag: prody.AtomGroup):
     coords = ag.getCoords()
     com = coords.mean(axis=0)
     size = self._get_box_size_from_ag_and_center(ag, com)
     origin = com - size * self.cell / 2
     return origin.astype(float), size.astype(int) + 1
示例#9
0
def DefineNewAtom(atom_name, element, coordinates, resname, resnum, chain_id):
    """
    This function creates a new AtomGroup instance containing one atom.
    """
    new_atom = AtomGroup()
    new_atom.setNames([atom_name])
    new_atom.setElements([element])
    new_atom.setCoords([coordinates])
    new_atom.setResnames(resname)
    new_atom.setResnums(resnum)
    new_atom.setChids(chain_id)
    new_atom.setAltlocs([''])
    new_atom.setBetas([0])
    new_atom.setIcodes([''])
    new_atom.setOccupancies([1])
    new_atom.setSegnames([''])
    new_atom.setSerials([0])
    # new_atom.setAnisous([[0.0, 0.0, 0.0]])
    return new_atom
示例#10
0
def AddAtoms(initial_residue, atoms2add, atomnames_of_2_letters, residue_data, zmatrix, verbose=True):
    """
    This function returns a mutated residue with the atoms added.
    This function gets a residue and a list of atomnames to add to the mutation.
    :rtype : object
    :param initial_residue:
    :param atoms2add: 
    :param atomnames_of_2_letters: 
    :param residue_data: 
    :param zmatrix: 
    :param verbose: 
    """
    if verbose:
        print 'Adding new atoms:'
    new_atoms_elements = []
    for atom in atoms2add:
        if verbose:
            print " # {}".format(atom)
        if atom[:2] in atomnames_of_2_letters:
            new_atoms_elements.append(atom[:2])
        else:
            new_atoms_elements.append(atom[0])

    new_atoms = AtomGroup('New atoms')
    number_of_new_atoms = len(atoms2add)
    new_atoms.setNames(list(atoms2add))
    new_atoms.setElements(new_atoms_elements)
    new_atoms.setResnames([residue_data['fin_resname']] * number_of_new_atoms)
    new_atoms.setResnums([residue_data['resnum']] * number_of_new_atoms)
    new_atoms.setChids([residue_data['chain']] * number_of_new_atoms)
    new_atoms.setAltlocs([''] * number_of_new_atoms)
    new_atoms.setBetas([0] * number_of_new_atoms)
    new_atoms.setIcodes([''] * number_of_new_atoms)
    new_atoms.setOccupancies([1] * number_of_new_atoms)
    new_atoms.setSegnames([''] * number_of_new_atoms)
    new_atoms.setSerials([0] * number_of_new_atoms)
    temporary_coords = np.ones([len(atoms2add), 3])
    new_atoms.setCoords(temporary_coords)
    incomplete_residue = initial_residue + new_atoms
    for atom in new_atoms.iterAtoms():
        atom_new_coordinates = GenerateCoordinatesFromZmatrix(incomplete_residue, [atom.getName()],
                                                                                 zmatrix)
        atom.setCoords(atom_new_coordinates)
        incomplete_residue = initial_residue + new_atoms
    if verbose:
        print 'Done'
    return initial_residue + new_atoms
示例#11
0
def AddHfromPRO(initial_residue, zmatrix, mutation):
    new_atom = AtomGroup('Hydrogen')
    new_atom.setNames(['H'])
    new_atom.setElements(['H'])
    new_atom.setResnames([zmatrix.Name])
    new_atom.setResnums([mutation['resnum']])
    new_atom.setChids([mutation['chain']])
    new_atom.setAltlocs([''])
    new_atom.setBetas([0])
    new_atom.setIcodes([''])
    new_atom.setOccupancies([1])
    new_atom.setSegnames([''])
    new_atom.setSerials([0])
    new_atom.setCoords(ModifyCoordinatesPRO(initial_residue, zmatrix, 'H', 'CD'))
    return initial_residue + new_atom
示例#12
0
def get_alpha_indices(protein: pd.AtomGroup) -> List[int]:
    """
    Get indices of alpha carbons of pd AtomGroup object
    """
    return [a.getIndex() for a in protein.iterAtoms() if a.getName() == "CA"]
示例#13
0
def visualizemapApp(inp_file, out_file, sel_type, atom_group: prody.AtomGroup,
                    vmin_fltr, vmax_fltr, dmin_fltr, dmax_fltr, cyl_rad):

    ##########################################################################
    selectedAtoms = atom_group.select('protein and name CA')
    ##########################################################################
    minColorBarLimit = 0.0
    maxColorBarLimit = 1.0
    # Read data file and assign to a numpy array
    if sel_type.lower() == "ndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
        # Check the data range in the matrix.
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        if minCorrelationValue < 0.0:
            # Assume that it is an nDCC file
            minColorBarLimit = -1.0
        if maxCorrelationValue > 1.00001:
            print("This correlation map is not normalized!")
            # TODO: At this point, one can ask the user if s/he wants to normalize it!
            sys.exit(-1)
        else:
            maxColorBarLimit = 1.0
    elif sel_type.lower() == "dcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
        # Check the data range in the matrix.
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    elif sel_type.lower() == "absndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \
                                                        Ctype=True,
                                                        symmetric=True,
                                                        writeAllOutput=False))
        else:
            ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float))
        minColorBarLimit = 0.0
        maxColorBarLimit = 1.0
    elif sel_type.lower() == "lmi":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False)
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
        #minColorBarLimit = 0.0

    elif sel_type.lower() == "nlmi":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False)
        #minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = 0.0

        # Ideally, it is supposed to be 1 but I used 1.00001 to avoid
        # rounding problems
        if maxCorrelationValue > 1.00001:
            print("This LMI map is not normalized!")
            # TODO: At this point, one can ask the user if s/he wants to normalize it!
            sys.exit(-1)
        else:
            maxColorBarLimit = 1.0

    elif sel_type.lower() == "coeviz":
        ccMatrix = np.loadtxt(inp_file, dtype=float)
        minColorBarLimit = 0.0
        maxColorBarLimit = 1.0

    elif sel_type.lower() == "evcouplings":
        ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False)
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    elif sel_type.lower() == "generic":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)

        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    elif sel_type.lower() == "eg":
        # The data type is elasticity graph
        ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \
                                            writeAllOutput=False)

        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    else:
        print(
            "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n"
        )
        print(
            "@>        coeviz or evcouplings. If you have your data in full \n"
        )
        print(
            "@>        matrix format and your data type is none of the options\n"
        )
        print("@>        mentionned, you can set data type 'generic'.\n")
        sys.exit(-1)

    # Set vmin_fltr and vmax_fltr
    if (vmin_fltr == None):
        vmin_fltr = minColorBarLimit
    if (vmax_fltr == None):
        vmax_fltr = maxColorBarLimit

    print(f"""@> Min. value filter: {vmin_fltr}""")
    print(f"""@> Max. value filter: {vmax_fltr}""")

    ##########################################################################
    # Call overall correlation calculation

    overallCorrelationMap(ccMatrix, minColorBarLimit, maxColorBarLimit,
                          out_file, " ", selectedAtoms)

    plotDistributions = True
    VMDcylinderRadiusScale = 0.5
    PMLcylinderRadiusScale = 0.3

    if (cyl_rad == None):
        if sel_type.lower() == "evcouplings":
            VMDcylinderRadiusScale = 0.02
            PMLcylinderRadiusScale = 0.02
        else:
            VMDcylinderRadiusScale = 0.5
            PMLcylinderRadiusScale = 0.3
        print(f"""@> VMD Cylinder radius: {VMDcylinderRadiusScale}""")
        print(f"""@> PyMol Cylinder radius: {PMLcylinderRadiusScale}""")

    else:
        VMDcylinderRadiusScale = float(cyl_rad)
        PMLcylinderRadiusScale = float(cyl_rad)
        print(f"""@> Cylinder radius: {cyl_rad}""")

    if plotDistributions:
        if sel_type.lower() == "ndcc":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "nDCC",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)
        elif sel_type.lower() == "dcc":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "DCC",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)

        elif sel_type.lower() == "absndcc":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "Abs(nDCC)",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=False)

        elif sel_type.lower() == "lmi":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "LMI",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=True)
        elif sel_type.lower() == "nlmi":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "nLMI",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=True)
        elif sel_type.lower() == "coeviz":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "CoeViz",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=True)

        elif sel_type.lower() == "evcouplings":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "EVcoupling Score",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)

        elif sel_type.lower() == "generic":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "Correlation",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)
        elif sel_type.lower() == "eg":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "Force Constants",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)
        else:
            print("Warning: Unknows correlation data.\n")
            print("         Correlations can be dcc, ndcc, absndcc, lmi,\n")
            print("         nlmi, coeviz or evcouplings!\n")

    ##########################################################################
    # Check number of chains. If there are multiple chains, plot inter and
    # intra chain correlations
    chains = Counter(selectedAtoms.getChids()).keys()
    saveMatrix = False
    plotChains = True
    if len(chains) > 1 and plotChains:
        intraChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit,
                                  out_file, " ", selectedAtoms, saveMatrix)
        interChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit,
                                  out_file, " ", selectedAtoms, saveMatrix)

    # Here, we can filter some correlation values closer than a distance.
    # Typically, it is supposed to filter out the correlation within the
    # same secondary structure etc.
    filterByDistance = True
    if filterByDistance:
        disMinValue = float(dmin_fltr)
        disMaxValue = float(dmax_fltr)
        ccMatrix = filterCorrelationMapByDistance(ccMatrix,
                                                  out_file,
                                                  " ",
                                                  selectedAtoms,
                                                  disMinValue,
                                                  disMaxValue,
                                                  absoluteValues=False,
                                                  writeAllOutput=False)

    # Overall projection
    projectCorrelationsOntoProteinVMD(
        out_file,
        ccMatrix,
        out_file,
        selectedAtoms,
        vminFilter=float(vmin_fltr),
        vmaxFilter=float(vmax_fltr),
        cylinderRadiusScaler=VMDcylinderRadiusScale,
        absoluteValues=True,
        writeAllOutput=True)

    projectCorrelationsOntoProteinPyMol(
        out_file,
        ccMatrix,
        out_file,
        selectedAtoms,
        vminFilter=float(vmin_fltr),
        vmaxFilter=float(vmax_fltr),
        cylinderRadiusScaler=PMLcylinderRadiusScale,
        absoluteValues=True,
        writeAllOutput=True)