def get_side_chain_vector(residue):
     """
     Find the average of the unit vectors to different atoms in the side chain
     from the c-alpha atom. For glycine the average of the N-Ca and C-Ca is
     used.
     Returns (C-alpha coordinate vector, side chain unit vector) for residue r
     """
     u = None
     gly = 0
     if is_aa(residue) and residue.has_id('CA'):
         ca = residue['CA'].get_coord()
         dv = np.array(
             [ak.get_coord() for ak in residue.get_unpacked_list()[4:]])
         if len(dv) < 1:
             if residue.has_id('N') and residue.has_id('C'):
                 dv = [residue['C'].get_coord(), residue['N'].get_coord()]
                 dv = np.array(dv)
                 gly = 1
             else:
                 return None
         dv = dv - ca
         if gly:
             dv = -dv
         n = np.sum(abs(dv)**2, axis=-1)**(1. / 2)
         v = dv / n[:, np.newaxis]
         u = (Vector(ca), Vector(v.mean(axis=0)))
     return u
Пример #2
0
def COM(object):
	com_n = Vector(0,0,0)
	com_d = 0.0
	for atom in object.get_atoms():
		position = atom.get_vector()
		com_n += Vector(position._ar*np.array(atom.mass))
		com_d += atom.mass
	com = com_n.__div__(com_d)
	return com
Пример #3
0
 def test_transform(self):
     """Transform entities (rotation and translation)."""
     for o in (self.s, self.m, self.c, self.r, self.a):
         rotation = rotmat(Vector(1,3,5), Vector(1,0,0))
         translation=numpy.array((2.4,0,1), 'f')
         oldpos = self.get_pos(o)
         o.transform(rotation, translation)
         newpos = self.get_pos(o)
         newpos_check = numpy.dot(oldpos, rotation) + translation
         for i in range(0, 3):
             self.assertAlmostEqual(newpos[i], newpos_check[i])
Пример #4
0
def calculateCoordinates(refA, refB, refC, L, ang, di):
	AV=refA.get_vector(); BV=refB.get_vector(); CV=refC.get_vector()
	CA=AV-CV; CB=BV-CV
	##CA vector
	AX=CA[0]; AY=CA[1]; AZ=CA[2]
	##CB vector
	BX=CB[0]; BY=CB[1]; BZ=CB[2]
	##Plane Parameters
	A=(AY*BZ)-(AZ*BY); B=(AZ*BX)-(AX*BZ); G=(AX*BY)-(AY*BX)
	##Dot Product Constant
	F= math.sqrt(BX*BX + BY*BY + BZ*BZ) * L * math.cos(ang*(math.pi/180.0))
	##Constants
	const=math.sqrt( math.pow((B*BZ-BY*G),2) *(-(F*F)*(A*A+B*B+G*G)+(B*B*(BX*BX+BZ*BZ) + A*A*(BY*BY+BZ*BZ)- (2*A*BX*BZ*G) + (BX*BX+ BY*BY)*G*G - (2*B*BY)*(A*BX+BZ*G))*L*L))
	denom= (B*B)*(BX*BX+BZ*BZ)+ (A*A)*(BY*BY+BZ*BZ) - (2*A*BX*BZ*G) + (BX*BX+BY*BY)*(G*G) - (2*B*BY)*(A*BX+BZ*G)
	X= ((B*B*BX*F)-(A*B*BY*F)+(F*G)*(-A*BZ+BX*G)+const)/denom
	if((B==0 or BZ==0) and (BY==0 or G==0)):
		const1=math.sqrt( G*G*(-A*A*X*X+(B*B+G*G)*(L-X)*(L+X)))
		Y= ((-A*B*X)+const1)/(B*B+G*G)
		Z= -(A*G*G*X+B*const1)/(G*(B*B+G*G))
	else:
		Y= ((A*A*BY*F)*(B*BZ-BY*G)+ G*( -F*math.pow(B*BZ-BY*G,2) + BX*const) - A*( B*B*BX*BZ*F- B*BX*BY*F*G + BZ*const)) / ((B*BZ-BY*G)*denom)
		Z= ((A*A*BZ*F)*(B*BZ-BY*G) + (B*F)*math.pow(B*BZ-BY*G,2) + (A*BX*F*G)*(-B*BZ+BY*G) - B*BX*const + A*BY*const) / ((B*BZ-BY*G)*denom)
	#GET THE NEW VECTOR from the orgin
	D=Vector(X, Y, Z) + CV
	with warnings.catch_warnings():
		# ignore inconsequential warning
		warnings.simplefilter("ignore")
		temp=calc_dihedral(AV, BV, CV, D)*(180.0/math.pi)
	di=di-temp
	rot= rotaxis(math.pi*(di/180.0), CV-BV)
	D=(D-BV).left_multiply(rot)+BV
	return D
Пример #5
0
    def dihedral_calcul(self, others):
        """ KC - dihedral score calculated """

        self.angle_dihedres = []
        all = others
        all.append(self)

        for n in xrange(len(self.res)):
            try:
                ecart_type = stat_ecart_type([
                    calc_dihedral(
                        Vector(all[i].get_res()[n]['C'].get_coord()),
                        Vector(all[i].get_res()[n]['CA'].get_coord()),
                        Vector(all[i].get_res()[n]['CB'].get_coord()),
                        Vector(all[i].get_res()[n]['CG'].get_coord())) /
                    math.pi * 180 for i in xrange(len(all))
                ])
            except:
                ecart_type = 0.01
            self.angle_dihedres.append(ecart_type)

        plot([x + 1 for x in xrange(len(self.angle_dihedres))],
             self.angle_dihedres)
        savefig("angle_dihedres.png")
        clf()

        mini = min(self.angle_dihedres)
        maxi = max(self.angle_dihedres)
        bfactor_angle = [(var - mini) * 100 / (maxi - mini)
                         for var in self.angle_dihedres]
        assert max(
            bfactor_angle
        ) <= 100, "maximum de bfactor trop haut apres normalisation: " + str(
            max(bfactor_angle))
        assert min(
            bfactor_angle
        ) >= 0, "minimum de bfactor trop bas apres normalisation: " + str(
            min(bfactor_angle))

        for model in others:
            model.set_angles_dihedres(self.angle_dihedres)
            model.create_bfactor_file(bfactor_angle, "_dihedre")
Пример #6
0
def add_frame(img):
    global infile
    global chain_boundary
    global end
    chain1 = []
    chain2 = []
    line = []
    while line[:11] != 'ITEM: ATOMS':
        line = next(infile)
    line = next(infile)
    while line[:5] not in ['ITEM:', '\n']:
        line_split = line.split()
        if line_split[1] != '1':
            pass
        else:
            if int(line_split[0]) <= chain_boundary:
                atom = Vector(line_split[-3:])
                atom = Vector((atom._ar * np.array(400.0)) + np.array(-200.0))
                chain1.append(atom)
            else:
                atom = Vector(line_split[-3:])
                atom = Vector((atom._ar * np.array(400.0)) + np.array(-200.0))
                chain2.append(atom)

        try:
            line = next(infile)
        except StopIteration:
            end = True
            break
    #if line == '\n':
    #	end = True
    for i, atom1 in enumerate(chain1):
        row = []
        for j, atom2 in enumerate(chain2):
            d = (atom1 - atom2).norm()
            img[i][j] += d
Пример #7
0
    def center(self):
        """
        Pocket centroid.

        Returns
        -------
        Bio.PDB.vectors.Vector
            Coordinates for the pocket centroid.
        """

        ca_atoms = self.ca_atoms
        ca_atom_vectors = ca_atoms["ca.atom"].to_list()
        ca_atom_vectors = [i for i in ca_atom_vectors if i is not None]
        centroid = self.center_of_mass(ca_atom_vectors, geometric=False)
        centroid = Vector(centroid)

        return centroid
Пример #8
0
 def __init__(self, symbol, name, atomid, coords, bfactor, load_json=False):
     if not load_json:
         self.symbol = symbol.capitalize()
         self.name = name
         self.atomid = atomid
         self.coords = coords  # (), for consistency save everything as np.array()
         self.mc_sc = False
         if self.name == "CA" or self.name == "C" or self.name == "N" or self.name == "O":
             self.mc_sc = True
         if element_mass.get(self.symbol) is None:
             element_mass[self.symbol] = element(self.symbol).atomic_weight
         self.atomic_mass = element_mass[self.symbol]
         self.vector = Vector(x=self.coords[0], y=self.coords[1], z=self.coords[2])
         self.bfactor = bfactor
     else:
         self.symbol = None
         self.name = None
         self.atomid = None
         self.coords = None
         self.mc_sc = None
         self.atomic_mass = None
         self.vector = None
         self.bfactor = None
Пример #9
0
def analyse(input_file_name,
            refer_file_name,
            moved_chain_id,
            fixed_chain_id,
            r_moved_chain_id,
            r_fixed_chain_id,
            output_file1,
            output_file2,
            r_model_number=0):

    structure = PDBParser(PERMISSIVE=1).get_structure('to_analyse',
                                                      input_file_name)
    reference = PDBParser(PERMISSIVE=1).get_structure('reference',
                                                      refer_file_name)

    r_chain_moved = reference[r_model_number][r_moved_chain_id]
    r_chain_fixed = reference[r_model_number][r_fixed_chain_id]

    theta = []
    phi = []
    theta_x = []
    theta_y = []
    theta_z = []
    d = []
    coords_x = []
    coords_y = []
    coords_z = []
    matrix_entries = [_[:] for _ in [[]] * 9]

    for model_number, model in enumerate(structure):
        chain_moved = structure[model_number][moved_chain_id]
        chain_fixed = structure[model_number][fixed_chain_id]
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_moved.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_fixed.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        fixed_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_fixed.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_fixed.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        for atom in model.get_atoms():
            atom.transform(R, V)
        for atom in chain_moved.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        for atom in chain_fixed.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        fixed_centre = com_numerator.__div__(com_denominator)
        if fixed_centre.norm() > 0.5:
            print("Fixed chain norm is " + str(fixed_centre.norm()) +
                  " in model " + str(model_number) +
                  ". Should have been at the origin. Check code...")
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        x = moved_centre._ar[0]
        y = moved_centre._ar[1]
        z = moved_centre._ar[2]
        coords_x.append(x)
        coords_y.append(y)
        coords_z.append(z)

        d.append((moved_centre - fixed_centre).norm())
        if moved_centre.norm() > 1e-6:
            theta.append(moved_centre.angle(Vector(0, 0, 1)))
            norm = np.sqrt(x * x + y * y)
            if norm > 1e-6:
                phi.append(np.arctan2(y, x))
        else:
            theta.append(0.0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_moved.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_moved.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        theta_x.append(np.arctan2(R[2][1], R[2][2]))
        theta_y.append(
            np.arctan2(-R[2][0],
                       np.sqrt(R[2][1] * R[2][1] + R[2][2] * R[2][2])))
        theta_z.append(np.arctan2(R[1][0], R[0][0]))
        for _ in range(3):
            matrix_entries[_].append(R[0][_])
            matrix_entries[_ + 3].append(R[1][_])
            matrix_entries[_ + 6].append(R[2][_])

    f_results1 = open(output_file1, "w+")
    for frame in range(0, len(structure)):
        f_results1.write(
            str(frame) + '\t' + str(d[frame]) + '\t' + str(theta[frame]) +
            '\t' + str(phi[frame]) + '\t' + str(theta_x[frame]) + '\t' +
            str(theta_y[frame]) + '\t' + str(theta_z[frame]) + '\n')
    f_results1.close()
    f_results2 = open(output_file2, "w+")
    for frame in range(0, len(structure)):
        f_results2.write(
            str(frame) + '\t' + str(coords_x[frame]) + '\t' +
            str(coords_y[frame]) + '\t' + str(coords_z[frame]) + '\t')
        for _ in range(3):
            f_results2.write(str(matrix_entries[_][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 3][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 6][frame]) + '\t')
        f_results2.write('\n')
    f_results2.close()
Пример #10
0
def cross(v1, v2):
    return Vector(v1[1] * v2[2] - v1[2] * v2[1], v1[2] * v2[0] - v1[0] * v2[2],
                  v1[0] * v2[1] - v1[1] * v2[0])
Пример #11
0
def to_vector(t):
    return Vector(t[0], t[1], t[2])
Пример #12
0
def find_geometry(metals,
                  structure,
                  permissive=False,
                  all_metals=False,
                  external=None):

    # check metal contacts
    output = []
    checked_metals = []
    structure_list = Selection.unfold_entities(structure, "A")

    for metal in metals:

        # search distance based on metal type
        if metal[0].element == "YB":
            dist = 3.5
        elif metal[0].element == "K":
            dist = 3.3
        else:
            dist = 2.9

        metal_str = "{}:{}:{}".format(metal[2].id, metal[1].get_id()[1],
                                      metal[0].name)
        in_ext = []

        for i in external:
            if metal_str in i:
                in_ext = True

        if not in_ext and list(metal[0].coord) not in checked_metals:
            coords = metal[0].coord

            contacts = []

            for chain in structure.get_chains():

                for residue in chain.get_residues():
                    contacts_atoms = NeighborSearch(structure_list).search(
                        coords, dist, "A")
                    # exclude self-contacts, carbons and hydrogens
                    excluded_contacts = cs.metals + ['C', 'H']
                    contacts_atoms = [
                        c for c in contacts_atoms
                        if c.element not in excluded_contacts
                    ]

                    for atom in contacts_atoms:
                        if residue in chain.get_residues(
                        ) and atom in residue.get_atoms():
                            contacts.append([atom, residue, chain])

            combinations = list(itertools.combinations(contacts, 2))
            combinations = [list(c) for c in combinations]

            # get all atom - metal - atom angles
            for c in combinations:
                vi = Vector(c[0][0].coord)
                vj = Vector(c[1][0].coord)
                angle = vectors.calc_angle(vi, coords, vj) * 180 / np.pi
                c.append(angle)

            geo, coordinated_atoms = angle_classification(combinations, False)

            if geo is None and permissive:
                geo, coordinated_atoms = angle_classification(
                    combinations, True)

                if geo is None and all_metals and combinations:

                    geo, coordinated_atoms = angle_classification(
                        combinations, True)
                    if geo:
                        print(
                            "Found {} geometry around {} (residue {}). Adding constraints."
                            .format(geo, metal[0].name, metal[1].get_id()[1]))
                        checked_metals.append(list(metal[0].coord))
                    else:
                        coordinated_atoms = combinations
                        checked_metals.append(list(metal[0].coord))
                        geo = "no"
                        print(
                            "Found {} geometry around {} (residue {}). Adding constraints to all atoms within {}A of the metal."
                            .format(geo, metal[0].name, metal[1].get_id()[1],
                                    dist))

                elif geo is None and not all_metals:
                    raise ce.NoGeometryAroundMetal(
                        "Failed to determine geometry around {} (residue {}). Add constraints manually or set 'constrain_all_metals: true' to constrain all atoms within {}A of the metal."
                        .format(metal[0].name, metal[1].get_id()[1], dist))

                elif geo is None and all_metals and not combinations:
                    print("No atoms coordinated to {} (residue {}).".format(
                        metal[0].name, metal[1].get_id()[1]))

                elif geo:
                    checked_metals.append(list(metal[0].coord))
                    print(
                        "Found {} geometry around {} (residue {}). Adding constraints."
                        .format(geo, metal[0].name, metal[1].get_id()[1]))

            elif geo is None and all_metals and combinations:
                geo, coordinated_atoms = angle_classification(
                    combinations, True)

                if geo is None:
                    geo = "no"
                    coordinated_atoms = combinations
                    checked_metals.append(list(metal[0].coord))
                    print(
                        "Found {} geometry around {} (residue {}). Adding constraints to all atoms within {}A of the metal."
                        .format(geo, metal[0].name, metal[1].get_id()[1],
                                dist))

                else:
                    print(
                        "Found {} geometry around {} (residue {}). Adding constraints."
                        .format(geo, metal[0].name, metal[1].get_id()[1]))

            elif geo is None and all_metals and not combinations:
                print("No atoms coordinated to {} (residue {}).".format(
                    metal[0].name, metal[1].get_id()[1]))

            elif geo is None and not all_metals and not permissive:
                raise ce.NoGeometryAroundMetal(
                    "Failed to determine geometry around {} (residue {}). Add constraints manually or set 'constrain_all_metals: true' to constrain all atoms within {}A of the metal."
                    .format(metal[0].name, metal[1].get_id()[1], dist))

            else:
                checked_metals.append(list(metal[0].coord))
                print(
                    "Found {} geometry around {} (residue {}). Adding constraints."
                    .format(geo, metal[0].name, metal[1].get_id()[1]))

            # format string
            yaml_string = "{}-{}-{}:{}:{}-{}:{}:{}"
            spring_const = 50

            string_atoms = []
            for c in coordinated_atoms:
                atom1, atom2, angle = c

                if atom1 not in string_atoms:
                    string_atoms.append(atom1)
                if atom2 not in string_atoms:
                    string_atoms.append(atom2)

            for atom in string_atoms:
                atomname1 = atom[0].name
                resnum1 = atom[1].get_id()[1]
                chain1 = atom[2].get_id()

                atomname2 = metal[0].name
                resnum2 = metal[1].get_id()[1]
                chain2 = metal[2].get_id()

                atom_dist = atom[0] - metal[0]
                out = yaml_string.format(spring_const, atom_dist, chain1,
                                         resnum1, atomname1, chain2, resnum2,
                                         atomname2)

                output.append(out)

            output = list(set(output))

            if output:
                output = ['{}'.format(o) for o in output]

    return output
Пример #13
0
def add_water(refinement_input, ligand_chain, n_waters=2, test=False):
    if test:
        np.random.seed(42)

    if n_waters < 1:
        return

    else:
        output = []
        n_inputs = len(refinement_input)
        water_coords = []
        resnums = []
        atomnums = []
        chains = []
        resnames = []

        # get maximum residue and atom numbers
        with open(refinement_input[0], "r") as file:
            protein = file.readlines()

            for line in protein:
                if line.startswith("ATOM") or line.startswith(
                        "HETATM") or line.startswith("TER"):
                    try:
                        resnums.append(line[23:27].strip())
                        atomnums.append(line[7:11].strip())
                        chains.append(line[21])
                        resnames.append(line[17:20])
                    except:
                        IndexError("Line '{}' is too short".format(line))
        lig_length = resnames.count(ligand_chain)
        resnums = [int(num) for num in resnums if num]
        max_resnum = max(resnums)
        water_resnums = []
        water_chain = chains[0]  # water chain = 1st protein chain
        atomnum = max([int(num) for num in atomnums if num]) + 1 + lig_length

        water = cs.water * n_waters * n_inputs

        for inp in range(n_inputs):
            for n in range(n_waters):
                O_coords = Vector(
                    [np.random.randint(0, 100) for i in range(3)])
                H1_coords = O_coords + Vector(0.757, 0.586, 0.0)
                H2_coords = O_coords + Vector(-0.757, 0.586, 0.0)
                water_coords = water_coords + [list(O_coords)] + [
                    list(H1_coords)
                ] + [list(H2_coords)]

                max_resnum += 1  # each water must have a different residue number
                water_resnums = water_resnums + [max_resnum] * 3
            max_resnum += 1

        water_atomnums = [atomnum + j for j in range(n_waters * 3 * n_inputs)]

        # PDB lines - water
        water_output = []

        for atom, num, resnum, coord in zip(water, water_atomnums,
                                            water_resnums, water_coords):
            coord = ["{:7.4f}".format(c) for c in coord]
            coord = " ".join(coord)
            water_output.append(atom.format(num, water_chain, resnum, coord))

        sliced_water_output = []
        for i in range(0, len(water_output), n_waters * 3):
            sliced_water_output.append(water_output[i:i + n_waters * 3])

        # loop over minimisation inputs
        for inp, w in zip(refinement_input, sliced_water_output):
            new_protein_file = inp
            protein = []
            ligand = []

            # read in protein and ligand lines
            with open(inp, "r") as inp:
                lines = inp.readlines()

                for line in lines:
                    if line.startswith("ATOM") or line.startswith("HETATM"):
                        if line[17:20].strip() == ligand_chain:
                            ligand.append(line)
                        else:
                            protein.append(line)

            # add water to PDB
            with open(new_protein_file, "w+") as file:
                for line in protein:
                    file.write(line)
                file.write("\n")
                for line in w:
                    file.write(line)
                file.write("\n")
                for line in ligand:
                    file.write(line)

            # load again with Biopython
            parser = PDBParser()
            structure = parser.get_structure("complex", new_protein_file)
            water_list = []
            protein_list = Selection.unfold_entities(structure, "A")
            temp_protein_file = os.path.join(
                os.path.dirname(inp.name),
                os.path.basename(inp.name).replace(".pdb", "_temp.pdb"))

            for res in structure.get_residues():
                if res.resname == 'HOH':
                    water_list = water_list + Selection.unfold_entities(
                        res, "A")

            # check for water contacts
            contacts5 = []
            for w in water_list:
                contacts5 = contacts5 + NeighborSearch(protein_list).search(
                    w.coord, 5.0, "A")
            contacts5 = [c for c in contacts5
                         if c not in water_list]  # exclude "self" contacts

            # translate water, if needed
            while contacts5:
                contacts5 = []
                for w in water_list:
                    x, y, z = w.coord
                    w.set_coord([x - 5, y, z])
                    contacts5 = contacts5 + NeighborSearch(
                        protein_list).search(w.coord, 5.0, "A")
                    contacts5 = [c for c in contacts5 if c not in water_list]

            # save final output
            io = PDBIO()
            io.set_structure(structure)
            io.save(temp_protein_file)
            output.append(new_protein_file)

            new_water_lines = []
            with open(temp_protein_file, "r") as temp:
                temp_lines = temp.readlines()
                for line in temp_lines:
                    if line[17:20].strip() == "HOH":
                        line = line.replace(line[7:11],
                                            str(int(line[7:11]) + lig_length))
                        if line[12:15] == "2HW":
                            line = line + "\nTER\n"
                        new_water_lines.append(line)

            new_water_lines[-2] = new_water_lines[-2].replace("\nTER\n", "")

            with open(new_protein_file, "w+") as file:
                for line in protein:
                    file.write(line)
                file.write("\nTER\n")
                for line in new_water_lines:
                    file.write(line)
                file.write("\n")
                for line in ligand:
                    file.write(line)
                file.write("TER")

            os.remove(temp_protein_file)

        return output
Пример #14
0
def create_random_pdb(separation_distance, move_chain_id, fix_chain_id, input_file_name, output_pdb_name, model_number = 0):
	results = {}
	structure = PDBParser(PERMISSIVE=1).get_structure('whatever', input_file_name)
	chain_moved = structure[model_number][move_chain_id]
	chain_fixed = structure[model_number][fix_chain_id]

	old_fixed_centre = COM(chain_fixed)
	old_moved_centre = COM(chain_moved)
	com_denominator=0.0
	com_numerator = Vector(0,0,0)
		
	for atom in chain_moved.get_atoms():
		position = atom.get_vector()
		atom.set_coord(position - old_fixed_centre)

	#first step is to move origin to the com of fixed_chain.
	#So far the atoms in the moved_chain have been relocated.

	for atom in chain_fixed.get_atoms():
		position = atom.get_vector()
		atom.set_coord(position - old_fixed_centre)
	#now fixed_chain has been relocated. All coordinates are now wrt com of fixed_chain
	
	moved_centre = old_moved_centre - old_fixed_centre
	fixed_centre = Vector(0,0,0)


	d = (old_fixed_centre - old_moved_centre).norm()
	results["1_Input_Separation"] = d
	results["1_Old_fixed_chain_com"]=old_fixed_centre
	results["1_Old_moved_chain_com"]= old_moved_centre
	results["0_Intended_Output_Separation"] = separation_distance

	R1 = generate_3d()
	R2 = generate_3d()

	max_distance = 0.0
	com_numerator = Vector(0,0,0)
	com_denominator=0.0
	
	#Now we scale the separation distance and also rotate the chain_moved
	for atom in chain_moved.get_atoms():
		position = atom.get_vector()
		a = moved_centre.normalized()._ar * np.array(separation_distance)
		atom.set_coord((position - moved_centre).left_multiply(R2) + Vector(a))
		max_distance = max(max_distance, (atom.get_vector().norm()))
		position = atom.get_vector()
		com_numerator += Vector(position._ar*np.array(atom.mass))
		com_denominator +=atom.mass

	final_moved_centre = com_numerator.__div__(com_denominator)

	com_denominator=0.0
	com_numerator = Vector(0,0,0)	

	#Now we rotate the chain_fixed
	for atom in chain_fixed.get_atoms():
		position = atom.get_vector()
		atom.set_coord(position.left_multiply(R1))
		max_distance = max(max_distance, (atom.get_vector().norm()))
		position = atom.get_vector()
		com_numerator += Vector(position._ar*np.array(atom.mass))
		com_denominator +=atom.mass

	final_fixed_centre = com_numerator.__div__(com_denominator)
	d = (final_fixed_centre - final_moved_centre).norm()

	w = PDBIO()
	w.set_structure(structure)
	w.save(output_pdb_name)
	results["2_Output_Separation"]=d
	results["2_fixed_chain_com"]=final_fixed_centre
	results["2_moved_chain_com"]=final_moved_centre
	results["Max_distance"]=max_distance
	return results
Пример #15
0
def generate_node_features(protein_chains,
                           surface,
                           ns: NeighborSearch,
                           only_ca=Constants.GET_ONLY_CA_ATOMS):
    pdb_id = protein_chains[0].get_parent().full_id[0]
    pdb_id = pdb_id[-4:]
    dssp = make_dssp_dict(os.path.join(Constants.DSSP_PATH, pdb_id + '.dssp'))
    get_residues_t = dssp_key_t = min_dist_t = residue_depth_t = atom_d_t = settattr_t = 0

    for chain in protein_chains:
        start = time.time()
        residue_generator = chain.get_residues()
        get_residues_t += time.time() - start

        last_n_residues = deque(
            [None,
             next(residue_generator),
             next(residue_generator, None)])
        while last_n_residues[1] is not None:
            prev_res = last_n_residues.popleft()
            prev_res_name = Constants.EMPTY_STR_FEATURE
            if prev_res is not None:
                prev_res_name = prev_res.resname
            res = last_n_residues[0]

            next_res = last_n_residues[1]
            next_res_name = Constants.EMPTY_STR_FEATURE
            if next_res is not None:
                next_res_name = next_res.resname

            start = time.time()
            is_key = True
            key = res.full_id[2:]
            if key not in dssp[0]:
                key = (key[0], (' ', key[1][1], ' '))
                if key not in dssp[0]:
                    for dssp_key in dssp[0]:
                        if dssp_key[0] == key[0] and dssp_key[1][1] == key[1][
                                1]:
                            key = dssp_key
                            break

                    if key not in dssp[0]:
                        is_key = False
                        # raise Exception(f'DSSP key not found for {key}, model {res.full_id[0]}')
            if is_key:
                dssp_features = dssp[0][key]
            else:
                dssp_features = ('', '-', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                 0.0, 0.0, 0.0, 0.0, 0.0)
            dssp_key_t += time.time() - start

            start = time.time()
            is_cb = 'CB' in res
            cb_ca_surf_angle = 0
            ca_cb_surf_angle = 0

            ca_atom = res['CA']
            ca_d, ca_surf_idx = min_dist(ca_atom.get_coord(), surface)
            ca_vec = ca_atom.get_vector()
            if not is_cb:
                # print('there is no CB ..... :(((((((')
                pass
            else:
                cb_vec = res['CB'].get_vector()
                cb_d, cb_surf_idx = min_dist(res['CB'].get_coord(), surface)
                cb_ca_surf_angle = calc_angle(cb_vec, ca_vec,
                                              Vector(surface[ca_surf_idx]))
                ca_cb_surf_angle = calc_angle(ca_vec, cb_vec,
                                              Vector(surface[cb_surf_idx]))
            min_dist_t += time.time() - start

            start = time.time()
            res_d, dist_list = residue_depth(res, surface)
            if res_d is None:
                res_d = 5.0
                print("Nan values!!!")

            if ca_d is None:
                ca_d = 5.0
                print("Nan values!!!")
            residue_depth_t += time.time() - start

            for idx, atom in enumerate(res.get_atoms()):
                if only_ca:
                    atom = ca_atom

                start = time.time()
                atom_d, s_idx = dist_list[idx]
                atom_coord = atom.get_coord()
                ca_atom_coord = ca_atom.get_coord()

                d = atom_coord - ca_atom_coord
                ca_atom_dist = np.sqrt(np.sum(d * d))
                atom_ca_surf_angle = 0
                ca_atom_surf_angle = 0
                if not np.array_equal(atom_coord, ca_atom_coord):
                    atom_ca_surf_angle = calc_angle(atom.get_vector(), ca_vec,
                                                    Vector(surface[s_idx]))
                    ca_atom_surf_angle = calc_angle(ca_vec, atom.get_vector(),
                                                    Vector(surface[s_idx]))

                if atom_d is None:
                    atom_d = 5.0
                    print(f"Nan valuess!! {atom_d}, {atom}")
                atom_d_t += time.time() - start

                start = time.time()
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['prev_res_name'],
                        prev_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['next_res_name'],
                        next_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['residue_depth'],
                        res_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_depth'],
                        atom_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_depth'],
                        ca_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_dist'],
                        ca_atom_dist)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['cb_ca_surf_angle'],
                        cb_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_cb_surf_angle'],
                        ca_cb_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['atom_ca_surf_angle'],
                        atom_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_atom_surf_angle'],
                        ca_atom_surf_angle)
                setattr(atom, Constants.DSSP_FEATURES_NAME, dssp_features)
                settattr_t += time.time() - start

                cumsum_main = 0
                cumsum_plane = 0

                cumsum_atom_main = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                cumsum_atom_plane = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                for num, radius in enumerate(Constants.NEIGHBOUR_SUM_RADIUS):
                    atoms = ns.search(atom_coord, radius)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_radius_name(num)],
                        len(atoms) - cumsum_main)

                    num_above_plane = num_of_atoms_above_plane(
                        surface[s_idx] - atom_coord, atom_coord, atoms)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_above_plane_radius_name(
                                num)], num_above_plane - cumsum_plane)
                    cumsum_main += len(atoms)
                    cumsum_plane += num_above_plane

                    for i, atom_element in enumerate(
                            Constants.NEIGHBOUR_SUM_RADIUS_ATOMS):
                        atoms_one_element = list(
                            filter(
                                lambda a: a.element.upper() == atom_element.
                                upper(), atoms))
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.neighbour_sum_radius_name(
                                    num, atom_element)],
                            len(atoms_one_element) - cumsum_atom_main[i])

                        num_above_plane = num_of_atoms_above_plane(
                            surface[s_idx] - atom_coord, atom_coord,
                            atoms_one_element)
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.
                                neighbour_sum_above_plane_radius_name(
                                    num, atom_element)],
                            num_above_plane - cumsum_atom_plane[i])
                        cumsum_atom_main[i] += len(atoms_one_element)
                        cumsum_atom_plane[i] += num_above_plane
                if only_ca:
                    break
            last_n_residues.append(next(residue_generator, None))
Пример #16
0
    def add_water(self):
        """
        Adds water to the PDB file in a random position, then translates them until there are no clashes.
        """
        if self.test:
            np.random.seed(42)

        output = []
        n_inputs = len(self.input_pdbs)
        water_coords = []
        resnums = []
        atomnums = []
        chains = []
        resnames = []

        # Open the original PDB file
        with open(self.input_pdbs[0], "r") as file:
            # Figure out which lines refer to the actual structure and CONECTs, drop everything else
            lines = file.readlines()
            conect = [line for line in lines if "CONECT" in line]
            pdb_lines = [
                line for line in lines
                if "END" not in line and "CONECT" not in line
            ]

            for line in pdb_lines:
                if (line.startswith("ATOM") or line.startswith("HETATM")
                        or line.startswith("TER")):
                    try:
                        # Extract atom information
                        resnum = line[22:27].strip()
                        atomnum = line[7:11].strip()
                        chain = line[21]
                        resname = line[17:20]
                        resnums.append(resnum)
                        atomnums.append(atomnum)
                        chains.append(chain)
                        resnames.append(resname)

                        # If there are already waters in the system but were not selected to be perturbed, we exclude
                        # them
                        if resname == "HOH":
                            water = f"{chain}:{resnum}"
                            if (water not in self.user_waters
                                    and water not in self.water_to_exclude):
                                self.water_to_exclude.append(water)
                    # Line too short - Remarks pdb
                    except IndexError:
                        pass

        # Return if no waters are supposed to be added
        if self.n_waters < 1:
            return

        else:
            # Check the maximum existing residue name, so we know where to introduce the waters
            lig_length = resnames.count(self.ligand_residue)
            resnums = [int(num) for num in resnums if num]
            max_resnum = max(resnums)
            water_resnums = []

            # Figure out the chain ID and atom numbers to introduce the waters
            water_chain = chains[0]  # water chain = 1st protein chain
            atomnum = max([int(num)
                           for num in atomnums if num]) + 1 + lig_length

            # Enumerate enough water templates to add n_waters to each input
            water = cs.water * self.n_waters * n_inputs
            for input_pdb in range(n_inputs):
                for water_string in range(self.n_waters):
                    # Randomize oxygen coordinates - create an [x, y, z] vector
                    O_coords = Vector(
                        [np.random.randint(0, 100) for _ in range(3)])
                    # Add hydrogens to the oxygen
                    H1_coords = O_coords + Vector(0.757, 0.586, 0.0)
                    H2_coords = O_coords + Vector(-0.757, 0.586, 0.0)
                    water_coords = (water_coords + [list(O_coords)] +
                                    [list(H1_coords)] + [list(H2_coords)])
                    # Increment residue number, so each added water has a different one
                    max_resnum += 1
                    water_resnums = water_resnums + [max_resnum] * 3
                max_resnum += 1

            # Calculate atom numbers of all waters
            water_atomnums = [
                atomnum + j for j in range(self.n_waters * 3 * n_inputs)
            ]

            # Create water PDB lines based on calculated atom numbers, residues, etc.
            water_output = []
            for atom, num, resnum, coord in zip(water, water_atomnums,
                                                water_resnums, water_coords):
                # Format coordinates, so they fit into the PDB format
                coord = ["{:7.4f}".format(c) for c in coord]
                coord = " ".join(coord)
                water_output.append(
                    atom.format(num, water_chain, resnum, coord))

            # Slice created water PDB lines and split between different input PDBs
            sliced_water_output = []
            for i in range(0, len(water_output), self.n_waters * 3):
                sliced_water_output.append(water_output[i:i +
                                                        self.n_waters * 3])

            # Loop over PDB inputs and
            for input_pdb, water_output in zip(self.input_pdbs,
                                               sliced_water_output):
                new_protein_file = input_pdb
                # Write PDB lines followed by created water lines
                with open(input_pdb, "w+") as file:
                    for line in pdb_lines:
                        file.write(line)
                    file.write("\n")
                    for line in water_output:
                        file.write(line)
                    file.write("END")

                # Load the input PDB file again with Biopython to check for contacts
                parser = PDBParser()
                structure = parser.get_structure("complex", new_protein_file)
                water_list = []

                # Get all protein atoms to check for clashes
                protein_list = Selection.unfold_entities(structure, "A")

                # Get all relevant water atoms to check for clashes
                for res in structure.get_residues():
                    resnum = res._id[1]
                    if res.resname == "HOH":
                        if resnum not in resnums:
                            water_list = water_list + Selection.unfold_entities(
                                res, "A")

                # Check contacts between added waters and the protein at 5.0 angstrom
                contacts5 = []
                for water_output in water_list:
                    contacts5 = contacts5 + NeighborSearch(
                        protein_list).search(water_output.coord, 5.0, "A")
                contacts5 = [c for c in contacts5
                             if c not in water_list]  # exclude "self" contacts

                # Keep on tranlsating the water molecules as long as there are clashes at 5.0 A
                while contacts5:
                    contacts5 = []
                    for w_ in water_list:
                        x, y, z = w_.coord
                        # Set new coordinates and check contacts again
                        w_.set_coord([x - 5, y, z])
                        contacts5 = contacts5 + NeighborSearch(
                            protein_list).search(w_.coord, 5.0, "A")
                        contacts5 = [
                            c for c in contacts5 if c not in water_list
                        ]

                # Save final output with translated water as a temporary file
                temp_protein_file = os.path.join(
                    os.path.dirname(input_pdb),
                    os.path.basename(input_pdb).replace(".pdb", "_temp.pdb"),
                )

                io = PDBIO()
                io.set_structure(structure)
                io.save(temp_protein_file)
                output.append(new_protein_file)

                # Open the temporary file created with biopython
                new_water_lines = []
                with open(temp_protein_file, "r") as temp:
                    temp_lines = temp.readlines()

                    # Iterate over lines created with biopython
                    for line in temp_lines:
                        if (line[17:20].strip() == "HOH"
                                and int(line[22:27].strip()) not in resnums):
                            line = line.replace(
                                line[7:11], str(int(line[7:11]) + lig_length))
                            if line[12:15] == "2HW":
                                line = line.strip("\n") + "\nTER\n"
                            # If it's one of added waters, we manually change its residue number an save
                            new_water_lines.append(line)

                del new_water_lines[
                    -1]  # Last biopython line is a not needed TER

                # Save new water lines, so they are not duplicated in the next run
                with open("added_waters.txt", "a+") as water_file:
                    for line in new_water_lines:
                        water_file.write(line)

                # Overwrite the original input PDB, save original PDB lines, added water lines and the original CONECTs.
                with open(new_protein_file, "w+") as file:
                    for line in pdb_lines:
                        file.write(line)
                    if not line.startswith("TER"):
                        file.write("TER\n")
                    for line in new_water_lines:
                        file.write(line)
                    for line in conect:
                        file.write(line)
                    file.write("\n")
                    file.write("END")

                # Remove temporary biopython file
                os.remove(temp_protein_file)