Пример #1
0
    def get_rmsd_to(self, other_rnamodel, output='', dont_move=False):
        """Calc rmsd P-atom based rmsd to other rna model"""
        sup = Bio.PDB.Superimposer()

        if dont_move:
            # fix http://biopython.org/DIST/docs/api/Bio.PDB.Vector%27.Vector-class.html
            coords = array([a.get_vector().get_array() for a in self.atoms])
            other_coords = array([a.get_vector().get_array() for a in other_rnamodel.atoms])
            s = SVDSuperimposer()
            s.set(coords,other_coords)
            return s.get_init_rms()

        try:
            sup.set_atoms(self.atoms, other_rnamodel.atoms)
        except:
            print(self.fn, len(self.atoms),  other_rnamodel.fn, len(other_rnamodel.atoms))
            for a,b in zip(self.atoms, other_rnamodel.atoms):
                print(a.parent, b.parent)#a.get_full_id(), b.get_full_id())

        rms = round(sup.rms, 3)

        if output:
            io = Bio.PDB.PDBIO()
            sup.apply(self.struc.get_atoms())
            io.set_structure( self.struc )
            io.save("aligned.pdb")

            io = Bio.PDB.PDBIO()
            sup.apply(other_rnamodel.struc.get_atoms())
            io.set_structure( other_rnamodel.struc )
            io.save("aligned2.pdb")
        return rms
Пример #2
0
    def setUp(self):
        self.x = array([[51.65, -1.90, 50.07],
                        [50.40, -1.23, 50.65],
                        [50.68, -0.04, 51.54],
                        [50.22, -0.02, 52.85]])

        self.y = array([[51.30, -2.99, 46.54],
                        [51.09, -1.88, 47.58],
                        [52.36, -1.20, 48.03],
                        [52.71, -1.18, 49.38]])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)
Пример #3
0
    def __sub__(self, other):
        """
        Return rmsd between two fragments.

        Example:
            >>> rmsd=fragment1-fragment2

        @return: rmsd between fragments
        @rtype: float
        """
        sup=SVDSuperimposer()
        sup.set(self.coords_ca, other.coords_ca)
        sup.run()
        return sup.get_rms()
Пример #4
0
Файл: gc.py Проект: biocryst/gc
def distance_matrix(CA):

    n_models = CA.shape[0]
    distances = np.zeros((n_models, n_models))

    sup=SVDSuperimposer()
    for i in range(n_models):
        model1 = CA[i,:,:]
        for j in range(i+1,n_models):
            model2 = CA[j,:,:]
            sup.set(model1, model2)
            sup.run()
            rms=sup.get_rms()
            distances[i,j] = rms
            distances[j,i] = rms

    return distances
Пример #5
0
def computeRMSD():
	if len(ca_atoms)!=len(ca_atoms_pdb):
		print "Error. Length mismatch!", len(ca_atoms), len(ca_atoms_pdb)
		exit()
	l = len(ca_atoms)

	fixed_coord  = numpy.zeros((l, 3))
	moving_coord = numpy.zeros((l, 3))

	for i in range(0, l):
		fixed_coord[i]  = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]])
		moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])
	sup = SVDSuperimposer()
	sup.set(fixed_coord, moving_coord)
	sup.run()
	rms = sup.get_rms()
	return rms
Пример #6
0
    def set_atoms(self, fixed, moving):
        """Put (translate/rotate) the atoms in fixed on the atoms in
        moving, in such a way that the RMSD is minimized.

        @param fixed: list of (fixed) atoms
        @param moving: list of (moving) atoms
        @type fixed,moving: [L{Atom}, L{Atom},...]
        """
        if not len(fixed) == len(moving):
            raise PDBException("Fixed and moving atom lists differ in size")
        l = len(fixed)
        fixed_coord = numpy.zeros((l, 3))
        moving_coord = numpy.zeros((l, 3))
        for i in range(0, len(fixed)):
            fixed_coord[i] = fixed[i].get_coord()
            moving_coord[i] = moving[i].get_coord()
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        self.rms = sup.get_rms()
        self.rotran = sup.get_rotran()
Пример #7
0
def compute_frag_RMSD(res_len):
        if len(ca_atoms)!=len(ca_atoms_pdb):
		print "Error. Length mismatch! target:frag", len(ca_atoms_pdb), len(ca_atoms)
		return 0
        l = len(ca_atoms)
	N = res_len
	if l != N :
		print "atom list length mismatches the fragment length!", str(l), str(N)
		return 0

        fixed_coord  = numpy.zeros((l, 3))
        moving_coord = numpy.zeros((l, 3))

        for i in range(0, l):
                fixed_coord[i]  = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]])
                moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        rms = sup.get_rms()
        return rms
Пример #8
0
def assemble_multiscale_visualization(topology_fn, rmf_fn, pdb_dir,
                                      outprefix=None, chimerax=True,
                                      xl_fn=None):
    """
    Render multiscale versions of rigid bodies from PDB files + flexible
    beads from RMF files w/o mapped crosslinks.
    
    Args: 
    topology_fn (str): Topolgy file in pipe-separated-value (PSV) format
    as required in integrative modeling using IMP. For details on how
    to write a topology file, see:
    https://integrativemodeling.org/2.13.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html
        
    rmf_fn (str): Name of the RMF file.
    
    pdb_dir (str): Directory containing all the PDB files for the rigid
    bodies used in modeling.
    
    outprefix (str, optional): Prefix for output files. Defaults to None.
    
    chimerax (bool, optional): If true, a Chimerax script will be written (extension ".cxc"). Defaults to True.
    
    xl_fn (str, optional): A file containing a XL dataset. Defaults to None.
    If this dataset is supplied, then it will be mapped on to the overall 
    structure with satisfied XLs drawn in blue and violated XLs drawn in red.
    A XL dataset should be supplied in a comma-separated-value (CSV) format
    containing at least the following fields
    
    protein1, residue1, protein2, residue2, sat
    
    where the last field <sat> is a boolean 1 or 0 depending on whether
    the particular XL is satisfied (in the ensemble sense) as a result of the
    integrative modeling exercise.
    """
    
    # -------------------------------------------
    # read the RMF file and extract all particles
    # -------------------------------------------
    of = RMF.open_rmf_file_read_only(rmf_fn)
    rmf_model = IMP.Model()
    hier = IMP.rmf.create_hierarchies(of, rmf_model)[0]
    IMP.rmf.load_frame(of, 0)
    particles = IMP.core.get_leaves(hier)
    rmf_ps = {}
    for p in particles:
        molname = p.get_parent().get_parent().get_parent().get_name().strip()
        name = p.get_name().strip()
        coord = IMP.core.XYZ(p).get_coordinates()
        rmf_ps[(molname, name)] = coord
        
    # --------------------------------------------------------------
    # map pdb residues to rmf particles for each rigid body pdb file
    # --------------------------------------------------------------
    # read the topology file
    t = TopologyReader(topology_fn, pdb_dir=pdb_dir)
    components = t.get_components()

    map_pdb2rmf = {}
    rigid_body_models = {}
    rigid_body_residues = {}
    chain_ids = {} # these are matched to the chimerax rmf plugin
    chain_id_count = 0
    for c in components:
        # ignore unstructured residues
        if c.pdb_file == "BEADS": continue
        mol = c.molname
        pdb_prefix = os.path.basename(c.pdb_file).split(".pdb")[0]
        chain_id = c.chain
        resrange = c.residue_range
        offset = c.pdb_offset
        
        r0 = resrange[0] + offset
        r1 = resrange[1] + 1 + offset
        
        if mol not in chain_ids:
            chain_ids[mol] = string.ascii_uppercase[chain_id_count]
            chain_id_count += 1
        
        if pdb_prefix not in map_pdb2rmf:
            map_pdb2rmf[pdb_prefix] = {}
            this_rigid_body_model = PDBParser().get_structure("x", c.pdb_file)[0]
            this_rigid_body_residues = {(r.full_id[2], r.id[1]): r for r in this_rigid_body_model.get_residues()}
            rigid_body_models[pdb_prefix] = this_rigid_body_model
            rigid_body_residues[pdb_prefix] = this_rigid_body_residues
            
        for r in range(r0, r1):
            key = (chain_id, r)
            val = (mol, r)
            if key in rigid_body_residues[pdb_prefix]:
                map_pdb2rmf[pdb_prefix][key] = val
    
    # --------------------------------
    # align all pdb files with the rmf
    # --------------------------------
    print("\nAligning all rigid body structures...")
    align = SVDSuperimposer()
    for pdb_prefix, mapper in map_pdb2rmf.items():
        pdb_coords = []
        pdb_atoms = []
        rmf_coords = []
        
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)]
            pdb_coords.append(r["CA"].coord)
            pdb_atoms.extend([a for a in r.get_atoms()])
            rmf_coords.append(rmf_ps[(mol, str(rmf_res))])        
                 
        pdb_coords = np.array(pdb_coords)
        rmf_coords = np.array(rmf_coords)
        align.set(rmf_coords, pdb_coords)
        align.run()
        rotmat, vec = align.get_rotran()
        [a.transform(rotmat, vec) for a in pdb_atoms]
  
    # --------------------------
    # assemble the composite pdb
    # --------------------------
    mols = set(sorted([c.molname for c in components]))
    print("\nChain IDs by molecule:")
    for k, v in chain_ids.items():
        print("molecule %s, chain ID %s" % (k, v))
    
    reslists = {mol: [] for mol in mols}
    for pdb_prefix, mapper in map_pdb2rmf.items():
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)] ; resid = rmf_res
            new_id = (r.id[0], resid, r.id[2])
            new_resname = r.resname
            new_segid = r.segid
            new_atoms = r.get_atoms()
            new_residue = Residue.Residue(id=new_id, resname=new_resname, segid=new_segid)
            [new_residue.add(a) for a in new_atoms]
            reslists[mol].append(new_residue)
    
    composite_model = Model.Model(0)
    for mol, chain_id in chain_ids.items():
        this_residues = sorted(reslists[mol], key=lambda r: r.id[1])
        this_chain = Chain.Chain(chain_id)
        [this_chain.add(r) for r in this_residues]
        composite_model.add(this_chain)
    
    # save the composite pdb to file
    io = PDBIO()
    io.set_structure(composite_model)
    if outprefix is None:
        outprefix = "centroid_model"
    io.save(outprefix + ".pdb")

    # -------------------------------------------------------------------
    # chimerax rendering (hide most of the rmf except unstructured beads)
    # -------------------------------------------------------------------
    if not chimerax: exit()
    print("\nWriting UCSF Chimerax script...")
    s = ""
    s += "open %s\n" % (outprefix + ".pdb")
    s += "open %s\n" % rmf_fn
    s += "hide\n"
    s += "show cartoon\n"
    s += "color #%d %s\n" % (CHIMERAX_PDB_MODEL_NUM, STRUCT_COLOR)
    s += "color #%d %s\n" % (CHIMERAX_RMF_MODEL_NUM, UNSTRUCT_COLOR)
    s += "hide #%d\n" % CHIMERAX_RMF_MODEL_NUM
    
    struct_residues = []
    for key, val in map_pdb2rmf.items():
        struct_residues.extend(list(val.values()))
    
    unstruct_atomspec = {}
    for p in rmf_ps:
        molname, particle_name = p
        rmf_chain_id = chain_ids[molname]
        if "bead" in particle_name:
            r0, r1 = particle_name.split("_")[0].split("-")
            r0 = int(r0) ; r1 = int(r1)
            this_atomspec = "#%d/%s:%d-%d" % \
                            (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r0, r1)
            for r in range(r0, r1+1):
                unstruct_atomspec[(molname, r)] = this_atomspec
        else:
            if (molname, int(particle_name)) not in struct_residues:
                r = int(particle_name)
                this_atomspec = "#%d/%s:%d" % \
                (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r)
                unstruct_atomspec[(molname, r)] = this_atomspec
                
    s += "show %s\n" % (" ".join(set(unstruct_atomspec.values())))

    # ----------------------------------------------------------
    # if crosslink data is supplied, write out a pseudobond file
    # ----------------------------------------------------------
    if xl_fn is not None:
        # parse XL data
        df = pd.read_csv(os.path.abspath(xl_fn))
        xls = []
        for i in range(len(df)):
            this_df = df.iloc[i]
            p1 = this_df["protein1"] ; r1 = this_df["residue1"]
            p2 = this_df["protein2"] ; r2 = this_df["residue2"]
            sat = this_df["sat"]
            xls.append((p1, r1, p2, r2, sat))
        
        # get lists of struct atomspecs
        atomspec = {}
        for (mol, particle_name) in rmf_ps:
            if "bead" in particle_name: continue
            if (mol, int(particle_name)) in unstruct_atomspec: continue
            chain_id = chain_ids[mol]
            resid = int(particle_name)
            atomspec[(mol, resid)] = "#%d/%s:%d@CA" % \
                                     (CHIMERAX_PDB_MODEL_NUM, chain_id, resid)
        
        # now add in all the unstruct atomspecs
        atomspec.update(unstruct_atomspec)

        # write pseudobond script
        s_pb = ""
        s_pb += "; radius = %2.2f\n" % XL_RADIUS
        s_pb += "; dashes = 0\n"
        for xl in xls:
            p1, r1, p2, r2, sat = xl
            atomspec_1 = atomspec[(p1, r1)]
            atomspec_2 = atomspec[(p2, r2)]
            if atomspec_1 == atomspec_2:
                continue
            color = SAT_XL_COLOR if sat else VIOL_XL_COLOR
            s_pb += "%s %s %s\n" % (atomspec_1, atomspec_2, color)
        s_pb += "\n"
        pb_fn = outprefix + "_XLs.pb"
        with open(pb_fn, "w") as of:
            of.write(s_pb)        
        s += "open %s\n" % pb_fn
            
    s += "preset 'overall look' publication\n"
    chimerax_out_fn = outprefix + ".cxc"
    with open(chimerax_out_fn, "w") as of:
        of.write(s)
def compute_deviations(reader,
                       mean_structure,
                       indexed_mean_structure,
                       num_confs,
                       start=None,
                       stop=None):
    """
        Computes RMSF of each particle from the mean structure

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # helper to fetch nucleotide positions
    fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])

    indexed_fetch_np = lambda conf: np.array(
        [n.cm_pos for n in conf._nucleotides if n.index in indexes])

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    deviations = []

    mysystem = reader._get_system(N_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = fetch_np(mysystem)
        indexed_cur_conf = indexed_fetch_np(mysystem)
        sup.set(indexed_mean_structure, indexed_cur_conf)
        sup.run()
        print("Frame number:", confid, "RMSF:", sup.get_rms())
        # realign frame
        rot, tran = sup.get_rotran()
        # align structures and collect coordinates for each frame
        # compatible with json
        deviations.append(
            list(
                np.linalg.norm(np.einsum('ij, ki -> kj', rot, cur_conf) +
                               tran - mean_structure,
                               axis=1)))
        confid += 1
        mysystem = reader._get_system()

    return deviations
Пример #10
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "superimposes one or more structures sharing a topology to a reference structure"
    )
    parser.add_argument('reference',
                        type=str,
                        nargs=1,
                        help="The reference configuration to superimpose to")
    parser.add_argument(
        'victims',
        type=str,
        nargs='+',
        help="The configuraitons to superimpose on the reference")
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Align to only a subset of particles from a space-separated list in the provided file'
    )
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "Bio"])

    #Get the reference files
    ref_dat = args.reference[0]

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(ref_dat) as r:
            indexes = list(range(len(r.read().positions)))

    #Create list of configurations to superimpose
    to_sup = []
    r = ErikReader(ref_dat)
    ref = r.read()
    ref.inbox()
    ref_conf = ref.positions[indexes]
    for i in args.victims:
        r = ErikReader(i)
        sys = r.read()
        sys.inbox()
        to_sup.append(sys)

    sup = SVDSuperimposer()

    #Run the biopython superimposer on each configuration and rewrite its configuration file
    for i, sys in enumerate(to_sup):
        indexed_cur_conf = sys.positions[indexes]
        sup.set(ref_conf, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        sys.positions = np.einsum('ij, ki -> kj', rot, sys.positions) + tran
        sys.a1s = np.einsum('ij, ki -> kj', rot, sys.a1s)
        sys.a3s = np.einsum('ij, ki -> kj', rot, sys.a3s)
        sys.write_new("aligned{}.dat".format(i))
        print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
Пример #11
0
def calc_DockQ(model,native,use_CA_only=False):
    
    exec_path=os.path.dirname(os.path.abspath(sys.argv[0]))    
    atom_for_sup=['CA','C','N','O']
    if(use_CA_only):
        atom_for_sup=['CA']

    cmd_fnat=exec_path + '/fnat ' + model + ' ' + native + ' 5'
    #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone'
    cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10'


    #fnat_out = os.popen(cmd_fnat).readlines()
    fnat_out = commands.getoutput(cmd_fnat)
#    sys.exit()
    (fnat,nat_correct,nat_total,fnonnat,nonnat_count,model_total,interface5A)=parse_fnat(fnat_out)
    assert fnat!=-1, "Error running cmd: %s\n" % (cmd_fnat)
#    inter_out = os.popen(cmd_interface).readlines()
    inter_out = commands.getoutput(cmd_interface)
    (fnat_bb,nat_correct_bb,nat_total_bb,fnonnat_bb,nonnat_count_bb,model_total_bb,interface)=parse_fnat(inter_out)
    assert fnat_bb!=-1, "Error running cmd: %s\n" % (cmd_interface)

    #print fnat
    #Use same interface as for fnat for iRMS
    #interface=interface5A


    # Start the parser
    pdb_parser = Bio.PDB.PDBParser(QUIET = True)

    # Get the structures
    ref_structure = pdb_parser.get_structure("reference", native)
    sample_structure = pdb_parser.get_structure("model", model)

    # Use the first model in the pdb-files for alignment
    # Change the number 0 if you want to align to another structure
    ref_model    = ref_structure[0]
    sample_model = sample_structure[0]

    # Make a list of the atoms (in the structures) you wish to align.
    # In this case we use CA atoms whose index is in the specified range
    ref_atoms = []
    sample_atoms = []

    common_interface=[]

    chain_res={}


    #find atoms common in both sample and native
    atoms_def_sample=[]
    atoms_def_in_both=[]
    #first read in sample
    for sample_chain in sample_model:
#        print sample_chain
        chain=sample_chain.id
#        print chain
        for sample_res in sample_chain:
           # print sample_res
            if sample_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=sample_res.get_id()[1]
            key=str(resname) + chain
            for a in atom_for_sup:
                atom_key=key + '.' + a
                if a in sample_res:
                    if atom_key in atoms_def_sample:
                        print atom_key + ' already added (MODEL)!!!'
                    atoms_def_sample.append(atom_key)

    #then read in native also present in sample
    for ref_chain in ref_model:
        chain=ref_chain.id
        for ref_res in ref_chain:
            #print ref_res
            if ref_res.get_id()[0] != ' ': #Skip hetatm.
#                print ref_res.get_id()
                continue
            resname=ref_res.get_id()[1]
            key=str(resname) + chain
            for a in atom_for_sup:
                atom_key=key + '.' + a
                if a in ref_res and atom_key in atoms_def_sample:
                    if atom_key in atoms_def_in_both:
                        print atom_key + ' already added (Native)!!!' 
                    atoms_def_in_both.append(atom_key)


#    print atoms_def_in_both
    for sample_chain in sample_model:
        chain=sample_chain.id
        if chain not in chain_res.keys():
            chain_res[chain]=[]
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=sample_res.get_id()[1]
            key=str(resname) + chain
            chain_res[chain].append(key)
            if key in interface:
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        sample_atoms.append(sample_res[a])
                common_interface.append(key)

    #print inter_pairs

    chain_ref={}
    common_residues=[]



    # Iterate of all chains in the model in order to find all residues
    for ref_chain in ref_model:
        # Iterate of all residues in each model in order to find proper atoms
        #  print dir(ref_chain)
        chain=ref_chain.id
        if chain not in chain_ref.keys():
            chain_ref[chain]=[]
        for ref_res in ref_chain:
            if ref_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=ref_res.get_id()[1]
            key=str(resname) + chain

            #print ref_res
            #      print key
            # print chain_res.values()
            if key in chain_res[chain]: # if key is present in sample
                #print key
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    if a in ref_res and atom_key in atoms_def_in_both:
                        chain_ref[chain].append(ref_res[a])
                        common_residues.append(key)
                      #chain_sample.append((ref_res['CA'])
            if key in common_interface:
              # Check if residue number ( .get_id() ) is in the list
              # Append CA atom to list
                #print key  
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    #print atom_key
                    if a in ref_res and atom_key in atoms_def_in_both:
                        ref_atoms.append(ref_res[a])



    #get the ones that are present in native        
    chain_sample={}
    for sample_chain in sample_model:
        chain=sample_chain.id
        if chain not in chain_sample.keys():
            chain_sample[chain]=[]
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=sample_res.get_id()[1]
            key=str(resname) + chain
            if key in common_residues:
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        chain_sample[chain].append(sample_res[a])

        #if key in common_residues:
        #     print key  
        #sample_atoms.append(sample_res['CA'])
        #common_interface.append(key)


    assert len(ref_atoms)!=0, "length of native is zero"
    assert len(sample_atoms)!=0, "length of model is zero"
    assert len(ref_atoms)==len(sample_atoms), "Different number of atoms in native and model %d %d\n" % (len(ref_atoms),len(sample_atoms))

    super_imposer = Bio.PDB.Superimposer()
    super_imposer.set_atoms(ref_atoms, sample_atoms)
    super_imposer.apply(sample_model.get_atoms())

    # Print RMSD:
    irms=super_imposer.rms

    (chain1,chain2)=chain_sample.keys()

    ligand_chain=chain1
    receptor_chain=chain2
    len1=len(chain_res[chain1])
    len2=len(chain_res[chain2])

    assert len1!=0, "%s chain has zero length!\n" % chain1
    assert len2!=0, "%s chain has zero length!\n" % chain2

    class1='ligand'
    class2='receptor'
    if(len(chain_sample[chain1]) > len(chain_sample[chain2])):
        receptor_chain=chain1
        ligand_chain=chain2
        class1='receptor'
        class2='ligand'



    #print len1
    #print len2
    #print chain_sample.keys()

    #Set to align on receptor
    assert len(chain_ref[receptor_chain])==len(chain_sample[receptor_chain]), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (receptor_chain,len(chain_ref[receptor_chain]),len(chain_sample[receptor_chain]))

    super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain])
    super_imposer.apply(sample_model.get_atoms())
    receptor_chain_rms=super_imposer.rms
    #print receptor_chain_rms
    #print dir(super_imposer)
    #print chain1_rms

    #Grep out the transformed ligand coords

    #print ligand_chain

    #print chain_ref[ligand_chain]
    #print chain_sample[ligand_chain]
    #l1=len(chain_ref[ligand_chain])
    #l2=len(chain_sample[ligand_chain])




    assert len(chain_ref[ligand_chain])!=0 or len(chain_sample[ligand_chain])!=0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain]))


    assert len(chain_ref[ligand_chain])==len(chain_sample[ligand_chain]), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain]))

    coord1=np.array([atom.coord for atom in chain_ref[ligand_chain]])
    coord2=np.array([atom.coord for atom in chain_sample[ligand_chain]])

    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])

    #print len(coord1)
    #print len(coord2)

    sup=SVDSuperimposer()
    Lrms = sup._rms(coord1,coord2) #using the private _rms function which does not superimpose


    #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain])
    #super_imposer.apply(sample_model.get_atoms())
    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])
    #Rrms= sup._rms(coord1,coord2)
    #should give same result as above line
    #diff = coord1-coord2
    #l = len(diff) #number of atoms
    #from math import sqrt
    #print sqrt(sum(sum(diff*diff))/l)
    #print np.sqrt(np.sum(diff**2)/l)
    DockQ=(float(fnat) + 1/(1+(irms/1.5)*(irms/1.5)) + 1/(1+(Lrms/8.5)*(Lrms/8.5)))/3
    dict={}
    dict['DockQ']=DockQ
    dict['irms']=irms
    dict['Lrms']=Lrms
    dict['fnat']=fnat
    dict['nat_correct']=nat_correct
    dict['nat_total']=nat_total

    dict['fnonnat']=fnonnat
    dict['nonnat_count']=nonnat_count
    dict['model_total']=model_total
    
    dict['chain1']=chain1
    dict['chain2']=chain2
    dict['len1']=len1
    dict['len2']=len2
    dict['class1']=class1
    dict['class2']=class2
    
    return dict
Пример #12
0
Файл: gc.py Проект: biocryst/gc
def align_models(CA):
    n_models = CA.shape[0]
    working_CA = np.copy(CA)
    sup=SVDSuperimposer()
    
    ref_model = working_CA[0, :, :]
    rms_total = 0

    for i_model in range(1, n_models):
        sup.set(ref_model, working_CA[i_model])
        sup.run()
        rms_total += sup.get_rms()**2
        working_CA[i_model] = sup.get_transformed()

    rms_best = float("inf")
    epsilon = 0.001
    while rms_best - rms_total  > epsilon:
        rms_best = rms_total
        mean_model = np.mean(working_CA,0)
        rms_total = 0
        for i_model in range(n_models):
            sup.set(mean_model, working_CA[i_model])
            sup.run()
            rms_total += sup.get_rms()**2
            working_CA[i_model] = sup.get_transformed()

    transformations = []
    for start_model, result_model in zip(CA, working_CA):
        sup.set(result_model, start_model)
        sup.run()
        transformations.append(sup.get_rotran())

    return transformations,np.sqrt(rms_total/n_models)
Пример #13
0
class SVDSuperimposerTest(unittest.TestCase):

    def setUp(self):
        self.x = array([[51.65, -1.90, 50.07],
                        [50.40, -1.23, 50.65],
                        [50.68, -0.04, 51.54],
                        [50.22, -0.02, 52.85]])

        self.y = array([[51.30, -2.99, 46.54],
                        [51.09, -1.88, 47.58],
                        [52.36, -1.20, 48.03],
                        [52.71, -1.18, 49.38]])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)

    def test_get_init_rms(self):
        x = array([[1.19, 1.28, 1.37],
                   [1.46, 1.55, 1.64],
                   [1.73, 1.82, 1.91]])
        y = array([[1.91, 1.82, 1.73],
                   [1.64, 1.55, 1.46],
                   [1.37, 1.28, 1.19]])
        self.sup.set(x, y)
        self.assertIsNone(self.sup.init_rms)
        init_rms = 0.8049844719
        self.assertTrue(
            float('%.3f' % self.sup.get_init_rms()), float('%.3f' % init_rms))

    def test_oldTest(self):
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3)))
        self.assertIsNone(self.sup.rot)
        self.assertIsNone(self.sup.tran)
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        self.sup.run()
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3)))
        rot = array([[0.68304983, 0.53664371, 0.49543563],
                     [-0.52277295, 0.83293229, -0.18147242],
                     [-0.51005037, -0.13504564, 0.84947707]])
        tran = array([38.78608157, -20.65451334, -15.42227366])
        self.assertTrue(
            array_equal(around(self.sup.rot, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.tran, decimals=3), around(tran, decimals=3)))
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        rms = 0.00304266526014
        self.assertEqual(
            float('%.3f' % self.sup.get_rms()), float('%.3f' % rms))

        rot_get, tran_get = self.sup.get_rotran()
        self.assertTrue(
            array_equal(around(rot_get, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(tran_get, decimals=3), around(tran, decimals=3)))

        y_on_x1 = dot(self.y, rot) + tran
        y_x_solution = array(
            [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01],
             [5.03977138e+01, -1.22877050e+00, 5.06488200e+01],
             [5.06801788e+01, -4.16095666e-02, 5.15368866e+01],
             [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]])
        self.assertTrue(
            array_equal(around(y_on_x1, decimals=3), around(y_x_solution, decimals=3)))

        y_on_x2 = self.sup.get_transformed()
        self.assertTrue(
            array_equal(around(y_on_x2, decimals=3), around(y_x_solution, decimals=3)))
Пример #14
0
def doublets_dist(d1, d2):
    sup = SVDSuperimposer()
    sup.set(d1['vec'], d2['vec'])
    sup.run()
    rms1 = sup.get_rms()
    sup.set(d1['vec'], d2['vec2'])
    sup.run()
    rms2 = sup.get_rms()
    return min(rms1, rms2)
def main():
    #handle commandline arguments
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Aligns each frame in a trajectory to the first frame")
    parser.add_argument('traj',
                        type=str,
                        nargs=1,
                        help="The trajectory file to align")
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='The name of the new trajectory file to write out')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Align to only a subset of particles from a space-separated list in the provided file'
    )
    parser.add_argument(
        '-r',
        metavar='reference_structure',
        dest='reference_structure',
        nargs=1,
        help="Align to a provided configuration instead of the first frame.")
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "Bio"])

    #Parse command line arguments
    traj_file = args.traj[0]
    outfile = args.outfile[0]
    sup = SVDSuperimposer()

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    #-r will make it align to a provided .dat file instead of the first configuration
    if args.reference_structure:
        #read reference configuration
        r = ErikReader(args.reference_structure[0])
        ref = r.read()
        ref.inbox()
        r = ErikReader(traj_file)
        ref_conf = ref.positions[indexes]

        mysystem = align_frame(ref_conf, sup, r.read())

    else:
        #read the first configuration and use it as the reference configuration for the rest
        r = ErikReader(traj_file)
        mysystem = r.read()
        mysystem.inbox()
        ref_conf = mysystem.positions[indexes]

    #write first configuration to output file
    mysystem.write_new(outfile)
    mysystem = r.read()

    #Read the trajectory one configuration at a time and perform the alignment
    while mysystem != False:
        print("working on t = ", mysystem.time)

        mysystem = align_frame(ref_conf, sup, mysystem, indexes)

        mysystem.write_append(outfile)

        mysystem = r.read()
Пример #16
0
from Bio.SVDSuperimposer import SVDSuperimposer

# start with two coordinate sets (Nx3 arrays - Float0)

x = array([[51.65, -1.90, 50.07],
          [50.40, -1.23, 50.65],
          [50.68, -0.04, 51.54],
          [50.22, -0.02, 52.85]], 'f')

y = array([[51.30, -2.99, 46.54],
          [51.09, -1.88, 47.58],
          [52.36, -1.20, 48.03],
          [52.71, -1.18, 49.38]], 'f')

sup = SVDSuperimposer()

# set the coords
# y will be rotated and translated on x
sup.set(x, y)

# do the lsq fit
sup.run()

# get the rmsd
rms = sup.get_rms()

# get rotation (right multiplying!) and the translation
rot, tran = sup.get_rotran()

# rotate y on x manually
Пример #17
0
class SVDSuperimposerTest(unittest.TestCase):
    def setUp(self):
        self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65],
                        [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]])

        self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58],
                        [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)

    def test_get_init_rms(self):
        x = array([[1.19, 1.28, 1.37], [1.46, 1.55, 1.64], [1.73, 1.82, 1.91]])
        y = array([[1.91, 1.82, 1.73], [1.64, 1.55, 1.46], [1.37, 1.28, 1.19]])
        self.sup.set(x, y)
        self.assertIsNone(self.sup.init_rms)
        init_rms = 0.8049844719
        self.assertTrue(float("%.3f" % self.sup.get_init_rms()),
                        float("%.3f" % init_rms))

    def test_oldTest(self):
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3),
                        around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3),
                        around(self.y, decimals=3)))
        self.assertIsNone(self.sup.rot)
        self.assertIsNone(self.sup.tran)
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        self.sup.run()
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3),
                        around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3),
                        around(self.y, decimals=3)))
        rot = array([[0.68304983, 0.53664371, 0.49543563],
                     [-0.52277295, 0.83293229, -0.18147242],
                     [-0.51005037, -0.13504564, 0.84947707]])
        tran = array([38.78608157, -20.65451334, -15.42227366])
        self.assertTrue(
            array_equal(around(self.sup.rot, decimals=3),
                        around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.tran, decimals=3),
                        around(tran, decimals=3)))
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        rms = 0.00304266526014
        self.assertEqual(float("%.3f" % self.sup.get_rms()),
                         float("%.3f" % rms))

        rot_get, tran_get = self.sup.get_rotran()
        self.assertTrue(
            array_equal(around(rot_get, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(tran_get, decimals=3), around(tran,
                                                             decimals=3)))

        y_on_x1 = dot(self.y, rot) + tran
        y_x_solution = array(
            [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01],
             [5.03977138e+01, -1.22877050e+00, 5.06488200e+01],
             [5.06801788e+01, -4.16095666e-02, 5.15368866e+01],
             [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]])
        self.assertTrue(
            array_equal(around(y_on_x1, decimals=3),
                        around(y_x_solution, decimals=3)))

        y_on_x2 = self.sup.get_transformed()
        self.assertTrue(
            array_equal(around(y_on_x2, decimals=3),
                        around(y_x_solution, decimals=3)))
Пример #18
0
#Get the reference files
top_file = args.topology[0]
ref_dat = args.reference[0]

#Create list of configurations to superimpose
to_sup = []
r = LorenzoReader2(ref_dat, top_file)
ref = r._get_system()
ref.inbox()
ref_conf = fetch_np(ref)
for i in args.victims:
    r = LorenzoReader2(i, top_file)
    sys = r._get_system()
    to_sup.append(sys)

sup = SVDSuperimposer()

#Run the biopython superimposer on each configuration and rewrite its configuration file
for i, sys in enumerate(to_sup):
    cur_conf = fetch_np(sys)
    sup.set(ref_conf, cur_conf)
    sup.run()
    rot, tran = sup.get_rotran()
    cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
    for j, n in enumerate(sys._nucleotides):
        n.cm_pos = cur_conf[j]
        n._a1 = normalize(np.dot(n._a1, rot))
        n._a3 = normalize(np.dot(n._a3, rot))
    sys.print_lorenzo_output("aligned{}.dat".format(i), "/dev/null")
    print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
Пример #19
0
def tm_movement_2D(pdbs1, pdbs2, mode, data, gn_dictionary):
    string_mode = ["extracellular", "intracellular", "pocket", "middle"]
    intracellular = (mode == 1)
    print("COMPARISON", string_mode[mode])
    print(pdbs1)
    print("VS")
    print(pdbs2)

    distances_set1 = Distances()
    distances_set1.load_pdbs(pdbs1)
    distances_set1.filtered_gns = True

    distances_set2 = Distances()
    distances_set2.load_pdbs(pdbs2)
    distances_set2.filtered_gns = True

    conserved_set1 = distances_set1.fetch_conserved_gns_tm()
    conserved_set2 = distances_set2.fetch_conserved_gns_tm()
    conserved = [x for x in conserved_set2 if x in conserved_set1]

    gns = [[]] * 7
    middle_gpcr = [[]] * 7
    if mode <= 1: # Intracellular or Extracellular
        for i in range(0,7):
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if intracellular and i % 2 == 0: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            elif not intracellular and i % 2 == 1: # all even TMs (as # i+1)
                tm_only.reverse()
            if len(tm_only) < 3:
                print("too few residues")
                return []
            gns[i] = tm_only[0:3]

            for upwards in range(12, 6, -1):
                if len(tm_only) >= upwards:
                    middle_gpcr[i] = tm_only[(upwards-3):upwards]
                    break

        # INCLUDING References points from membrane middle of GPCR
        # ref_membrane_mid = {}
        # ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        # #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        # ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        # ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        # ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        # ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F
        #
        # middle_gpcr = ref_membrane_mid[data['gpcr_class']]
    elif mode == 2: # Major pocket (class A)
        ligand_references = [['1x39', '1x40','1x41'], ['2x56', '2x57','2x58'], ['3x31', '3x32', '3x33'], ['4x56', '4x57', '4x58'], ['5x43', '5x44', '5x45'], ['6x51', '6x52', '6x53'], ['7x39', '7x40', '7x41']]
        for i in range(0,7):
            gns[i] = [x for x in ligand_references[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(9, 6, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 9:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[6:9]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

        # # FILTER not conserved GNs
        # middle_gpcr = [[]] * 7
        # for i in range(0,7):
        #     tm_only = [x for x in conserved if x[0]==str(i+1)]
        #     if i % 2 == 0: #all uneven TMs (as # = i+1)
        #         tm_only.reverse()
        #
        #     if len(tm_only) < 3:
        #         print("too few residues")
        #         return []
        #
        #     middle_gpcr[i] = tm_only[0:3]
        #print(middle_gpcr)

    elif mode == 3: # Middle
        # References points from membrane middle of GPCR
        ref_membrane_mid = {}
        ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['6x48', '6x49', '6x50'], ['7x47', '7x49']] # B1
        ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F

        membrane_mid = ref_membrane_mid[data['gpcr_class']]

        if data['gpcr_class'] != "001":
            inv_gn_dictionary = {v: k for k, v in gn_dictionary.items()}
            for index in range(len(membrane_mid)):
                membrane_mid[index] = [inv_gn_dictionary[res] for res in membrane_mid[index]]

        for i in range(0,7):
            gns[i] = [x for x in membrane_mid[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(6, 3, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 6:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[3:6]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

    # Merge the reference and the helper points
    gns_flat = [y for x in gns for y in x]
    middle_gpcr = [list(filter(lambda x: x in conserved and x not in gns_flat, tm_list)) for tm_list in middle_gpcr]
    # print(gns)
    # print(middle_gpcr)

    ends_and_middle = gns[:]
    ends_and_middle.extend(middle_gpcr)
    ends_and_middle_flat = [y for x in ends_and_middle for y in x]
    ends_and_middle_grouping = [x for x in range(0, len(ends_and_middle)) for y in ends_and_middle[x]]
    segment_order = [int(ends_and_middle[x][0][0])-1 for x in range(0, len(ends_and_middle))]

    distances_set1.filter_gns.extend([y for x in ends_and_middle for y in x])
    distances_set2.filter_gns = distances_set1.filter_gns
    distances_set1.fetch_distances_tm(distance_type = "HC")
    distances_set2.fetch_distances_tm(distance_type = "HC")


    membrane_data1 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    membrane_data2 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    for i in range(0,len(ends_and_middle_flat)-1):
        for j in range(i+1, len(ends_and_middle_flat)):
            if right_gn_order(ends_and_middle_flat[i], ends_and_middle_flat[j]):
                filter_key = ends_and_middle_flat[i] + "_" + ends_and_middle_flat[j]
            else:
                filter_key = ends_and_middle_flat[j] + "_" + ends_and_middle_flat[i]

            if ends_and_middle_flat[i] != ends_and_middle_flat[j]:
                membrane_data1[i][j] = sum(distances_set1.data[filter_key])/len(pdbs1)
                membrane_data1[j][i] = membrane_data1[i][j]
                membrane_data2[i][j] = sum(distances_set2.data[filter_key])/len(pdbs2)
                membrane_data2[j][i] = membrane_data2[i][j]

    # Identify most stable TMs by ranking the variations to all other helices
    membrane_data1 = np.array([np.array(x) for x in membrane_data1])
    membrane_data2 = np.array([np.array(x) for x in membrane_data2])
    diff_distances = [x[:] for x in [[0] * len(ends_and_middle)] * len(ends_and_middle)]
    for i in range(0,max(ends_and_middle_grouping)):
        for j in range(i+1, max(ends_and_middle_grouping)+1):
            # Calculate movements for each TM relative to their "normal" distance
            # selected residues for group 1 and 2
            group_1 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
            group_2 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == j]

            diff_distances[i][j] = np.sum(abs(membrane_data1[group_1][:, group_2] - membrane_data2[group_1][:, group_2]))/(np.sum(membrane_data1[group_1][:, group_2]+membrane_data2[group_1][:, group_2])/2)*100
            diff_distances[j][i] = diff_distances[i][j]

    # Ranking for each TM
    sum_differences = [sum(x) for x in diff_distances]
    # normalized_differences = [((sum_differences[i]-min(sum_differences[0:7]))/(max(sum_differences[0:7])-min(sum_differences[0:7])))**2 for i in range(0,7)]
    for i in range(0,7):
        diff_distances[i] = [sorted(diff_distances[i]).index(x) for x in diff_distances[i]]
    final_rank = [sum([diff_distances[j][i] for j in range(0,7)]) for i in range(0,7)]

    # Grab stable TMs
    tm_ranking = [0] * 7
    sorted_rank = sorted(final_rank)
    for i in range(0,7):
        tm_ranking[i] = final_rank.index(sorted_rank[i])
        final_rank[tm_ranking[i]] = 100 # make sure this TM isn't repeated

    # Calculate 3D coordinates from distance matrix
    tms_centroids_set1, tms_set1 = recreate3Dorder(membrane_data1, ends_and_middle_grouping)
    tms_centroids_set2, tms_set2 = recreate3Dorder(membrane_data2, ends_and_middle_grouping)

    # Align 3D points of set2 with 3D points of set1 using the most stable reference points
    best_rmsd = 1000
    best_set = []
    # Disabled the testing RMSD for now
    for comb in combinations(tm_ranking[:3], 3):
    #for comb in combinations(tm_ranking[:4], 3):
        sel_refs = [x for x in range(0,len(segment_order)) if segment_order[x] in comb]
        #print(sel_refs)

        tms_reference_set1 = np.array(tms_centroids_set1[sel_refs], copy = True)
        tms_reference_set2 = np.array(tms_centroids_set2[sel_refs], copy = True)

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        rot, trans = imposer.get_rotran()
        rmsd = imposer.get_rms()

        print("RMSD", round(rmsd,2), tm_ranking)
        if rmsd < best_rmsd:
            best_set = comb
            best_rmsd = rmsd

    # Check for possible mirroring error
    test_set2 = np.dot(tms_centroids_set2, rot) + trans
    error = 0
    for i in tm_ranking[3:7]:
        if np.linalg.norm(test_set2[i] - tms_centroids_set1[i]) > 5:
            error += 1

    #if rmsd > 2:
    #if error >= 3 or rmsd > 2:
    if True:
        for i in range(0,len(tms_centroids_set2)):
            tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

        # Align 3D points of set2 with 3D points of set1 using the most stable reference points
        tms_reference_set1 = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]
        tms_reference_set2 = tms_centroids_set2[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        new_rot, new_trans = imposer.get_rotran()
        new_rmsd = imposer.get_rms()
        print("RMSD2", round(new_rmsd,2))

        if new_rmsd < rmsd:
            rot = new_rot
            trans = new_trans
            rmsd = new_rmsd
        else:
            for i in range(0,len(tms_centroids_set2)):
                tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

    # test_set2 = np.dot(tms_reference_set2, rot) + trans
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_reference_set1[i]]), "]")
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in test_set2[i]]), "]")
    #
    # print("############")
    # #test_set2 = np.dot(tms_centroids_set2, rot) + trans
    # test_set2 = np.array(tms_centroids_set2, copy = True)
    # for i in range(0,len(tms_centroids_set1)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set1[i]]), "]")
    # for i in range(0,len(tms_centroids_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set2[i]]), "]")

    # if rmsd > 2:
    #     for i in range(0,len(tms_centroids_set2)):
    #         tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1
    #     # Huge error during alignment of "stable" helices, just use the references not the helper points
    #     tms_reference_set1 = tms_centroids_set1[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     tms_reference_set2 = tms_centroids_set2[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     imposer = SVDSuperimposer()
    #     imposer.set(tms_reference_set1, tms_reference_set2)
    #     imposer.run()
    #     rot, trans = imposer.get_rotran()
    #     rmsd = imposer.get_rms()
    #     print("RMSD3", round(rmsd,2))
    #

    tms_centroids_set2 = np.dot(tms_centroids_set2, rot) + trans
    tms_set2 = np.dot(tms_set2, rot) + trans

    # Calculate optimal plane through points in both sets and convert to 2D
    # Try normal based on TM7
    # tm7_centroids = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] == 6]]
    # if len(tm7_centroids) == 2:
    #     normal = (tm7_centroids[1] - tm7_centroids[0])/np.linalg.norm(tm7_centroids[1] - tm7_centroids[0])
    # else:
    #     # Using TM mid as reference plane
    #     normal, midpoint = calculatePlane(np.concatenate((tms_centroids_set1[7:], tms_centroids_set2[7:])), intracellular)

    # Alternative: use center of helical ends and center of helical middle
    #    normal = tms_centroids_set1[:7].mean(axis=0)  - tms_centroids_set1[7:].mean(axis=0)
    #    normal = normal/np.linalg.norm(normal)

    # 7TM references
    tm_centroids = {y:[] for y in range(0,7)}
    [tm_centroids[y].append(tms_centroids_set1[x]) for y in range(0,7) for x in range(0,len(segment_order)) if segment_order[x] == y]
    count = 0
    normal = np.array([0.0,0.0,0.0])
    for y in range(0,7):
        #if len(tm_centroids[y]) == 2 and (mode != 1 or y != 5):
        if len(tm_centroids[y]) == 2:
            normal += np.array((tm_centroids[y][1] - tm_centroids[y][0])/np.linalg.norm(tm_centroids[y][1] - tm_centroids[y][0]))
            count += 1
    normal = normal/count

    midpoint = tms_centroids_set1[:7].mean(axis=0)

    #plane_set1, z_set1 = convert3D_to_2D_plane(tms_centroids_set1[:7], intracellular, normal, midpoint)
    #plane_set2, z_set2 = convert3D_to_2D_plane(tms_centroids_set2[:7], intracellular, normal, midpoint)
    plane_set, z_set = convert3D_to_2D_plane(np.concatenate((tms_centroids_set1[:7], tms_centroids_set2[:7]), axis = 0), intracellular, normal, midpoint)
    plane_set1 = plane_set[:7]
    plane_set2 = plane_set[7:]
    z_set1 = z_set[:7]
    z_set2 = z_set[7:]

    # DO NOT REMOVE: possibly we want to upgrade to weighted superposing
    # Based on Biopython SVDSuperimposer
    # coords = tms_centroids_set2
    # reference_coords = tms_centroids_set1

    # OLD centroid calcalation
    # av1 = sum(coords) / len(coords)
    # av2 = sum(reference_coords) / len(reference_coords)

    # NEW weighted centroid calculation
    # print(normalized_differences)
    # av1, av2 = 0, 0
    # totalweight = 0
    # for i in range(0,7):
    #     # print("Round",i)
    #     #weight = 1+(7-tm_ranking.index(i))/7
    #     weight = (1-normalized_differences[i]+0.1)/1.1
    #     totalweight += weight
    #     print("TM", str(i+1), "weight",weight)
    #     av1 += coords[i]*weight
    #     av2 += reference_coords[i]*weight
    #
    # av1 = av1/totalweight
    # av2 = av2/totalweight
    #
    # coords = coords - av1
    # reference_coords = reference_coords - av2
    #
    # # correlation matrix
    # a = np.dot(np.transpose(coords), reference_coords)
    # u, d, vt = np.linalg.svd(a)
    # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # # check if we have found a reflection
    # if np.linalg.det(rot) < 0:
    #     vt[2] = -vt[2]
    #     rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # trans = av2 - np.dot(av1, rot)
    # rot, trans = imposer.get_rotran()
    # tms_set2 = np.dot(tms_set2, rot) + trans

    # CURRENT: Ca-angle to axis core
    rotations = [0] * 7
    for i in range(0,7):
        try:
            # rotations[i] = [data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1] if abs(data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1]) < 180 else -1*data['tab4'][gn_dictionary[x]]['angles_set2'][1]-data['tab4'][gn_dictionary[x]]['angles_set1'][1] for x in gns[i]]
            angles1 = [data['tab4'][gn_dictionary[x]]['angles_set1'][11] for x in gns[i]]
            angles1 = [angle if angle > 0 else angle + 360 for angle in angles1 ]
            angles2 = [data['tab4'][gn_dictionary[x]]['angles_set2'][11] for x in gns[i]]
            angles2 = [angle if angle > 0 else angle + 360 for angle in angles2 ]

            rotations[i] = [angles1[x] - angles2[x] for x in range(3)]
            rotations[i] = [value if abs(value) <= 180 else value-360 if value > 0 else value+360 for value in rotations[i]]

            # count=0
            # for x in gns[i]:
            #     print(i, x, data['tab4'][gn_dictionary[x]]['angles_set1'][11], data['tab4'][gn_dictionary[x]]['angles_set2'][11], rotations[i][count])
            #     count += 1

        except:
            rotations[i] = [0.0, 0.0, 0.0]  # TODO: verify other class B errors

        # UPDATE 20-02-2020 No mirroring but top-down through GPCR
        rotations[i] = sum(rotations[i])/3
        # if intracellular:
        #     rotations[i] = -1*sum(rotations[i])/3
        # else:
        #     rotations[i] = sum(rotations[i])/3


    # ALTERNATIVE: utilize TM tip alignment (needs debugging as some angles seem off, e.g. GLP-1 active vs inactive TM2)
    # Add rotation angle based on TM point placement
    # tms_2d_set1, junk = convert3D_to_2D_plane(tms_set1, intracellular, normal, midpoint)
    # tms_2d_set2, junk = convert3D_to_2D_plane(tms_set2, intracellular, normal, midpoint)

    # rotations = [0] * 7
    # for i in range(0,7):
    #     positions = [x for x in range(0, len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
    #     turn_set1 = tms_2d_set1[positions]
    #     turn_set2 = tms_2d_set2[positions]
    #
    #     # set to middle
    #     turn_set1 = turn_set1 - turn_set1.mean(axis=0)
    #     turn_set2 = turn_set2 - turn_set2.mean(axis=0)
    #
    #     # Calculate shift per residue and take average for this TM
    #     for j in range(0,len(turn_set1)):
    #         v1 = turn_set1[j]/np.linalg.norm(turn_set1[j])
    #         v2 = turn_set2[j]/np.linalg.norm(turn_set2[j])
    #         angle = np.degrees(np.arctan2(v2[1], v2[0]) - np.arctan2(v1[1],v1[0]))
    #
    #         if abs(angle) > 180:
    #             angle = 360 - abs(angle)
    #
    #         rotations[i] += angle/len(turn_set1)

    # TODO: check z-coordinates orientation
    # Step 1: collect movement relative to membrane mid
    # Step 2: find min and max TM
    # Step 3: check if orientation of min/max TM matches the z-scales + intra/extra - if not invert z-coordinates
    labeled_set1 = [{"label": "TM"+str(i+1), "x": float(plane_set1[i][0]), "y": float(plane_set1[i][1]), "z": float(z_set1[i]), "rotation" : 0} for i in range(0,7)]
    labeled_set2 = [{"label": "TM"+str(i+1), "x": float(plane_set2[i][0]), "y": float(plane_set2[i][1]), "z": float(z_set2[i]), "rotation" : rotations[i]} for i in range(0,7)]

    # Convert used GNs to right numbering
    gns_used = gns[:]
    for i in range(0,len(gns)):
        for j in range(0,len(gns[i])):
            gns_used[i][j] = gn_dictionary[gns[i][j]]
    return {"coordinates_set1" : labeled_set1, "coordinates_set2": labeled_set2, "gns_used": gns_used}
Пример #20
0
def calc_DockQ(model, native, use_CA_only=False, capri_peptide=False):

    exec_path = os.path.dirname(os.path.abspath(sys.argv[0]))
    atom_for_sup = ['CA', 'C', 'N', 'O']
    if (use_CA_only):
        atom_for_sup = ['CA']

    cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 5 -all'
    cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 10 -all'

    if capri_peptide:
        cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 4 -all'
        cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 8 -cb'

    fnat_out = os.popen(cmd_fnat).read()

    #fnat_out = subprocess.getoutput(cmd_fnat)
    #print(fnat_out)
    #    sys.exit()
    (fnat, nat_correct, nat_total, fnonnat, nonnat_count, model_total,
     interface5A) = parse_fnat(fnat_out)
    assert fnat != -1, "Error running cmd: %s\n" % (cmd_fnat)
    inter_out = os.popen(cmd_interface).read()
    #   inter_out = subprocess.getoutput(cmd_interface)

    (fnat_bb, nat_correct_bb, nat_total_bb, fnonnat_bb, nonnat_count_bb,
     model_total_bb, interface) = parse_fnat(inter_out)
    assert fnat_bb != -1, "Error running cmd: %s\n" % (cmd_interface)

    #print fnat
    #Use same interface as for fnat for iRMS
    #interface=interface5A

    # Start the parser
    pdb_parser = Bio.PDB.PDBParser(QUIET=True)

    # Get the structures
    ref_structure = pdb_parser.get_structure("reference", native)
    sample_structure = pdb_parser.get_structure("model", model)

    # Use the first model in the pdb-files for alignment
    # Change the number 0 if you want to align to another structure
    ref_model = ref_structure[0]
    sample_model = sample_structure[0]

    # Make a list of the atoms (in the structures) you wish to align.
    # In this case we use CA atoms whose index is in the specified range
    ref_atoms = []
    sample_atoms = []

    common_interface = []

    chain_res = {}

    #find atoms common in both sample and native
    atoms_def_sample = []
    atoms_def_in_both = []
    #first read in sample
    for sample_chain in sample_model:
        #        print sample_chain
        chain = sample_chain.id
        #        print chain
        for sample_res in sample_chain:
            # print sample_res
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            for a in atom_for_sup:
                atom_key = key + '.' + a
                if a in sample_res:
                    if atom_key in atoms_def_sample:
                        print(atom_key + ' already added (MODEL)!!!')
                    atoms_def_sample.append(atom_key)

    #then read in native also present in sample
    for ref_chain in ref_model:
        chain = ref_chain.id
        for ref_res in ref_chain:
            #print ref_res
            if ref_res.get_id()[0] != ' ':  #Skip hetatm.
                #                print ref_res.get_id()
                continue
            resname = ref_res.get_id()[1]
            key = str(resname) + chain
            for a in atom_for_sup:
                atom_key = key + '.' + a
                if a in ref_res and atom_key in atoms_def_sample:
                    if atom_key in atoms_def_in_both:
                        print(atom_key + ' already added (Native)!!!')
                    atoms_def_in_both.append(atom_key)


#    print atoms_def_in_both
    for sample_chain in sample_model:
        chain = sample_chain.id
        if chain not in list(chain_res.keys()):
            chain_res[chain] = []
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            chain_res[chain].append(key)
            if key in interface:
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        sample_atoms.append(sample_res[a])
                common_interface.append(key)

    #print inter_pairs

    chain_ref = {}
    common_residues = []

    # Iterate of all chains in the model in order to find all residues
    for ref_chain in ref_model:
        # Iterate of all residues in each model in order to find proper atoms
        #  print dir(ref_chain)
        chain = ref_chain.id
        if chain not in list(chain_ref.keys()):
            chain_ref[chain] = []
        for ref_res in ref_chain:
            if ref_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = ref_res.get_id()[1]
            key = str(resname) + chain

            #print ref_res
            #      print key
            # print chain_res.values()
            if key in chain_res[chain]:  # if key is present in sample
                #print key
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in ref_res and atom_key in atoms_def_in_both:
                        chain_ref[chain].append(ref_res[a])
                        common_residues.append(key)
                    #chain_sample.append((ref_res['CA'])
            if key in common_interface:
                # Check if residue number ( .get_id() ) is in the list
                # Append CA atom to list
                #print key
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    #print atom_key
                    if a in ref_res and atom_key in atoms_def_in_both:
                        ref_atoms.append(ref_res[a])

    #get the ones that are present in native
    chain_sample = {}
    for sample_chain in sample_model:
        chain = sample_chain.id
        if chain not in list(chain_sample.keys()):
            chain_sample[chain] = []
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            if key in common_residues:
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        chain_sample[chain].append(sample_res[a])

        #if key in common_residues:
        #     print key
        #sample_atoms.append(sample_res['CA'])
        #common_interface.append(key)

    assert len(ref_atoms) != 0, "length of native is zero"
    assert len(sample_atoms) != 0, "length of model is zero"
    assert len(ref_atoms) == len(
        sample_atoms
    ), "Different number of atoms in native and model %d %d\n" % (
        len(ref_atoms), len(sample_atoms))

    super_imposer = Bio.PDB.Superimposer()
    super_imposer.set_atoms(ref_atoms, sample_atoms)
    super_imposer.apply(sample_model.get_atoms())

    # Print RMSD:
    irms = super_imposer.rms

    (chain1, chain2) = list(chain_sample.keys())

    ligand_chain = chain1
    receptor_chain = chain2
    len1 = len(chain_res[chain1])
    len2 = len(chain_res[chain2])

    assert len1 != 0, "%s chain has zero length!\n" % chain1
    assert len2 != 0, "%s chain has zero length!\n" % chain2

    class1 = 'ligand'
    class2 = 'receptor'
    if (len(chain_sample[chain1]) > len(chain_sample[chain2])):
        receptor_chain = chain1
        ligand_chain = chain2
        class1 = 'receptor'
        class2 = 'ligand'

    #print len1
    #print len2
    #print chain_sample.keys()

    #Set to align on receptor
    assert len(chain_ref[receptor_chain]) == len(
        chain_sample[receptor_chain]
    ), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (
        receptor_chain, len(
            chain_ref[receptor_chain]), len(chain_sample[receptor_chain]))

    super_imposer.set_atoms(chain_ref[receptor_chain],
                            chain_sample[receptor_chain])
    super_imposer.apply(sample_model.get_atoms())
    receptor_chain_rms = super_imposer.rms
    #print receptor_chain_rms
    #print dir(super_imposer)
    #print chain1_rms

    #Grep out the transformed ligand coords

    #print ligand_chain

    #print chain_ref[ligand_chain]
    #print chain_sample[ligand_chain]
    #l1=len(chain_ref[ligand_chain])
    #l2=len(chain_sample[ligand_chain])

    assert len(chain_ref[ligand_chain]) != 0 or len(
        chain_sample[ligand_chain]
    ) != 0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (
        ligand_chain, len(
            chain_ref[ligand_chain]), len(chain_sample[ligand_chain]))

    assert len(chain_ref[ligand_chain]) == len(
        chain_sample[ligand_chain]
    ), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (
        ligand_chain, len(
            chain_ref[ligand_chain]), len(chain_sample[ligand_chain]))

    coord1 = np.array([atom.coord for atom in chain_ref[ligand_chain]])
    coord2 = np.array([atom.coord for atom in chain_sample[ligand_chain]])

    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])

    #print len(coord1)
    #print len(coord2)

    sup = SVDSuperimposer()
    Lrms = sup._rms(
        coord1,
        coord2)  #using the private _rms function which does not superimpose

    #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain])
    #super_imposer.apply(sample_model.get_atoms())
    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])
    #Rrms= sup._rms(coord1,coord2)
    #should give same result as above line
    #diff = coord1-coord2
    #l = len(diff) #number of atoms
    #from math import sqrt
    #print sqrt(sum(sum(diff*diff))/l)
    #print np.sqrt(np.sum(diff**2)/l)
    DockQ = (float(fnat) + 1 / (1 + (irms / 1.5) * (irms / 1.5)) + 1 /
             (1 + (Lrms / 8.5) * (Lrms / 8.5))) / 3
    info = {}
    info['DockQ'] = DockQ
    info['irms'] = irms
    info['Lrms'] = Lrms
    info['fnat'] = fnat
    info['nat_correct'] = nat_correct
    info['nat_total'] = nat_total

    info['fnonnat'] = fnonnat
    info['nonnat_count'] = nonnat_count
    info['model_total'] = model_total

    info['chain1'] = chain1
    info['chain2'] = chain2
    info['len1'] = len1
    info['len2'] = len2
    info['class1'] = class1
    info['class2'] = class2

    return info
Пример #21
0
def run_system(dir):

    pdb = os.path.basename(dir).split('_')[0]
    
    org_dir = os.getcwd()
    os.chdir(dir)

    f_coord = "coord.h5"
    f_RMSD  = "RMSD.txt"
    f_OC = os.path.join("..","..","cmap_coordinates",pdb+'.txt')

    if not os.path.exists(f_OC):
        print "Missing coordinates for cmap", dir
        os.chdir(org_dir)
        return dir
    
    if not os.path.exists(f_coord):
        print "Missing coordinates, extract_coordinates.py first", dir
        os.chdir(org_dir)
        return dir

    if os.path.exists(f_RMSD) and not _FORCE:
        print "RMSD file exists, skipping", dir
        os.chdir(org_dir)
        return dir
    
    h5 = h5py.File(f_coord,'r')
    C = h5["coord"][:]
    h5.close()
    OC = np.loadtxt(f_OC)

    # Move the coordinates to something sensible
    #C  -= C.mean(axis=0)
    #OC -= OC.mean(axis=0)

    median_OC = np.median([np.linalg.norm(a-b)
                           for a,b in zip(OC,OC[1:])])
    median_C  = np.median([np.linalg.norm(a-b)
                           for a,b in zip(C[-1],C[-1][1:])])

    assert(C[0].shape == OC.shape)
    RMSD = []
    org_RMSD = []

    sup = SVDSuperimposer()

    RG = []
    OC -= OC.mean(axis=0)
    OC_RG = ((np.linalg.norm(OC,axis=1)**2).sum()/len(OC)) ** 0.5

    for cx in C:
        cx -= cx.mean(axis=0)

        rg_cx = ((np.linalg.norm(cx,axis=1)**2).sum()/len(cx)) ** 0.5
        RG.append(rg_cx)
        
        sup.set(OC,cx)
        sup.run()
        RMSD.append(sup.get_rms())
        org_RMSD.append(sup.get_init_rms())


    rot, tran = sup.get_rotran()
    cx = np.dot(cx, rot) + tran

    RMSD = np.array(RMSD)
    org_RMSD = np.array(org_RMSD)
    RG = np.array(RG)
    
    #print dir, RMSD[-20:].mean(), org_RMSD[-20:].mean(),RG[-20:].mean()
    print "{} {: 0.4f} {: 0.4f}".format(dir, RMSD[-200:].mean(),
                                      RG[-200:].mean() / OC_RG)
    

    '''
    from mpl_toolkits.mplot3d import Axes3D
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    ax.scatter(OC[:,0],OC[:,1],OC[:,2],'b')
    #ax.plot(OC[:,0],OC[:,1],OC[:,2],'k',alpha=0.5)

    ax.scatter(cx[:,0],cx[:,1],cx[:,2],color='r')
    #ax.plot(cx[:,0],cx[:,1],cx[:,2],'k',alpha=0.5)
    plt.show()
    exit()

    print OC
    #exit()
    '''
    
    np.savetxt(f_RMSD,RMSD)
    os.chdir(org_dir)

    return dir
Пример #22
0
def compute_centroid(reader,
                     mean_structure,
                     indexes,
                     num_confs,
                     start=None,
                     stop=None):
    """
        Compares each structure to the mean and returns the one with the lowest RMSF

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            centroid (numpy.array): The positions corresponding to the structure with the lowest RMSF to the mean.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    lowest_rmsf = 100000  #if you have a larger number than this, we need to talk...
    centroid_candidate = np.zeros_like(mean_structure)
    centroid_a1 = np.zeros_like(mean_structure)
    centroid_a3 = np.zeros_like(mean_structure)

    mysystem = reader.read(n_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = mysystem.positions
        indexed_cur_conf = mysystem.positions[indexes]
        cur_conf_a1 = mysystem.a1s
        cur_conf_a3 = mysystem.a3s
        sup.set(mean_structure, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()

        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1)
        cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3)
        RMSF = sup.get_rms()
        print("Frame number:", confid, "RMSF:", RMSF)
        if RMSF < lowest_rmsf:
            centroid_candidate = cur_conf
            centroid_a1 = cur_conf_a1
            centroid_a3 = cur_conf_a3
            lowest_rmsf = RMSF
            centroid_t = mysystem.time

        confid += 1
        mysystem = reader.read()

    return centroid_candidate, centroid_a1, centroid_a3, lowest_rmsf, centroid_t
except ImportError:
    from Bio import MissingPythonDependencyError
    raise MissingPythonDependencyError(
        "Install NumPy if you want to use Bio.SVDSuperimposer.")

from Bio.SVDSuperimposer import SVDSuperimposer

# start with two coordinate sets (Nx3 arrays - Float0)

x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54],
           [50.22, -0.02, 52.85]], 'f')

y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03],
           [52.71, -1.18, 49.38]], 'f')

sup = SVDSuperimposer()

# set the coords
# y will be rotated and translated on x
sup.set(x, y)

# do the lsq fit
sup.run()

# get the rmsd
rms = sup.get_rms()

# get rotation (right multiplying!) and the translation
rot, tran = sup.get_rotran()

# rotate y on x manually
Пример #24
0
def compute_mean(reader,
                 align_conf,
                 indexes,
                 num_confs,
                 start=None,
                 stop=None):
    """
        Computes the mean structure of a trajectory

        Structured to work with the multiprocessing process from UTILS/parallelize.py

        Parameters:
            reader (readers.ErikReader): An active reader on the trajectory file to take the mean of.
            align_conf (numpy.array): The position of each particle in the reference configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.
        
        Returns:
            mean_pos_storage (numpy.array): For each particle, the sum of positions in all configurations read.
            mean_a1_storage (numpy.array): For each particle, the sum of a1 orientation vectors in all configuraitons read.
            mean_a3_storage (numpy.array): For each particle, the sum of a3 orientation vectors in all configuraitons read.
            intermediate_mean_structures (list): mean structures computed periodically during the summing to check decoorrelation.
            confid (int): the number of configurations summed for the storage arrays.
    """
    parallel = True
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
        parallel = False
    else:
        start = int(start)

    mysystem = reader.read(n_skip=start)
    n_nuc = len(mysystem.positions)

    if not parallel:
        #This also computes the mean every num_confs/10 configurations to check decorrelation.
        #Only works when run in serial.
        INTERMEDIATE_EVERY = np.floor(num_confs / 10)
        # storage for the intermediate mean structures

    intermediate_mean_structures = []

    # the class doing the alignment of 2 structures
    sup = SVDSuperimposer()

    mean_pos_storage = np.zeros((n_nuc, 3))
    mean_a1_storage = np.zeros((n_nuc, 3))
    mean_a3_storage = np.zeros((n_nuc, 3))

    # for every conf in the current trajectory we calculate the global mean
    confid = 0

    while mysystem != False and confid < stop:
        mysystem.inbox()
        indexed_cur_conf_pos = mysystem.positions[indexes]

        # calculate alignment
        sup.set(align_conf, indexed_cur_conf_pos)
        sup.run()
        rot, tran = sup.get_rotran()

        #apply alignment
        mysystem.positions = np.einsum('ij, ki -> kj', rot,
                                       mysystem.positions) + tran
        mysystem.a1s = np.einsum('ij, ki -> kj', rot, mysystem.a1s)
        mysystem.a3s = np.einsum('ij, ki -> kj', rot, mysystem.a3s)
        mean_pos_storage += mysystem.positions
        mean_a1_storage += mysystem.a1s
        mean_a3_storage += mysystem.a3s

        # print the rmsd of the alignment in case anyone is interested...
        print("Frame:", confid, "Time:", mysystem.time, "RMSF:", sup.get_rms())
        # thats all we do for a frame
        confid += 1
        mysystem = reader.read()

        # We produce 10 intermediate means to check decorrelation.
        # This can't be done neatly in parallel
        if not parallel and confid % INTERMEDIATE_EVERY == 0:
            mp = np.copy(mean_pos_storage)
            mp /= confid
            intermediate_mean_structures.append(prep_pos_for_json(mp))
            print("INFO: Calculated intermediate mean for {} ".format(confid))

    return (mean_pos_storage, mean_a1_storage, mean_a3_storage,
            intermediate_mean_structures, confid)
Пример #25
0
class ResidueMutator(object):
    def __init__(self,
                 tripeptides=None,
                 components=None,
                 standard_residues=None):
        """ The mutator object takes a non-standard residue or incomplete residue and modifies it
        """
        # get defaults if not provided
        if (standard_residues is None):
            standard_residues = data.standard_residues
        if (tripeptides is None):
            tripeptides = data.tripeptides
        if (components is None):
            components = data.chem_components
        self.components = components
        self.candidates = {}
        self.standard_residues = standard_residues
        self.imposer = SVDSuperimposer()
        self.parser = PDBParser(PERMISSIVE=1, QUIET=True)

        # build up candidate structures
        for fn in tripeptides:
            structure = self.parser.get_structure("", fn)
            resn = structure[0][" "][2].get_resname()
            self.candidates[resn] = []
            for model in structure:
                self.candidates[resn].append(model[" "][2])

    def mutate(self, residue, repair=False):
        resn = residue.get_resname()
        if (repair):
            # use residue as its own parent
            parn = resn
        else:
            if (self.standard(resn)):
                # the residue is already a standard residue, do not need to mutate.
                return residue
            parn = self.components[resn]['_chem_comp.mon_nstd_parent_comp_id']
            if (not self.standard(parn)):
                # the parent residue is a nonstandard residue, can't mutate
                return False

        if (parn not in self.candidates):
            # parent not in candidate structures
            return False

        sc_fixed = set(
            self.components[resn]
            ['side_chain_atoms'])  # side chain atoms of fixed residue
        sc_movin = set(
            self.components[parn]
            ['side_chain_atoms'])  # side chain atoms of standard parent
        atom_names = sc_fixed.intersection(sc_movin)

        # get list of side chain atoms present in residue
        atom_list = []
        for atom in atom_names:
            if (atom in residue):
                atom_list.append(atom)

        # get side chain atom coordinates
        fixed_coord = np.zeros((len(atom_list), 3))
        for i in range(len(atom_list)):
            fixed_coord[i] = residue[atom_list[i]].get_coord()

        # loop over candidates, finding best RMSD
        moved_coord = np.zeros((len(atom_list), 3))
        min_rms = 99999
        rotm = None
        tran = None
        min_candidate = None
        for candidate in self.candidates[parn]:
            for j in range(len(atom_list)):
                moved_coord[j] = candidate[atom_list[j]].get_coord()
            # perfom SVD fitting
            self.imposer.set(fixed_coord, moved_coord)
            self.imposer.run()
            if (self.imposer.get_rms() < min_rms):
                min_rms = self.imposer.get_rms()
                rotm, tran = self.imposer.get_rotran()
                min_candidate = candidate

        # copy the candidate to a new object
        candidate = min_candidate.copy()
        candidate.transform(rotm, tran)
        stripHydrogens(candidate)

        # replace backbone atoms of candidate
        backbone_atoms = self.components[resn]['main_chain_atoms']
        for atom in backbone_atoms:
            if (atom not in residue):
                continue
            if (atom not in candidate):
                candidate.add(residue[atom].copy())
            candidate[atom].set_coord(residue[atom].get_coord())

        return candidate

    def standard(self, resname):
        return (resname in self.standard_residues)

    def modified(self, resname):
        if (resname in self.standard_residues):
            # it's standard, not modified
            return False

        if (resname in self.components and '_chem_comp.mon_nstd_parent_comp_id'
                in self.components[resname]):
            return (
                (resname not in self.standard_residues) and
                (self.components[resname]['_chem_comp.mon_nstd_parent_comp_id']
                 in self.standard_residues))
        else:
            # has no standard parent field - can't be modified
            return False
Пример #26
0
def merge_cc(coords_list, res_overlap, n_cc_helices):

    ref_coords = coords_list[0]
    aligned_coords = [deepcopy(coords_list[0])]
    n_atoms_per_res = 5
    n_atoms_mono = int(ref_coords.shape[0] / n_cc_helices)
    msds = []
    for coords, cc_overlap in zip(coords_list[1:], res_overlap):

        n_atoms_overlap = cc_overlap * n_atoms_per_res

        for i in range(n_cc_helices):
            hi_ref = ref_coords[(i + 1) * n_atoms_mono -
                                n_atoms_overlap:(i + 1) * n_atoms_mono]
            if i == 0:
                ref_atoms = hi_ref
            else:
                ref_atoms = np.append(ref_atoms, hi_ref, axis=0)

        for i in range(n_cc_helices):
            hi = coords[i * n_atoms_mono:i * n_atoms_mono + n_atoms_overlap]
            if i == 0:
                sup_atoms = hi
            else:
                sup_atoms = np.append(sup_atoms, hi, axis=0)

        sup = SVDSuperimposer()
        sup.set(ref_atoms, sup_atoms)
        sup.run()
        msds.append(sup.get_rms()**2)
        rot, tran = sup.get_rotran()
        coord_new = np.dot(coords, rot) + tran
        aligned_coords.append(coord_new)
        ref_coords = coord_new

    rmsd = np.sqrt(np.sum(msds))

    hi_all = []
    for i in range(n_cc_helices):
        hi_all.append(aligned_coords[0][i * n_atoms_mono:(i + 1) *
                                        n_atoms_mono])

    for coords, cc_overlap in zip(aligned_coords[1:], res_overlap):
        hi = []
        for i in range(n_cc_helices):
            hi.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono])

        n_atoms_overlap = cc_overlap * n_atoms_per_res
        for ind_overlap in range(cc_overlap):
            weight = (ind_overlap + 1) / float(cc_overlap + 1)
            for ind_atom in range(n_atoms_per_res):
                ind_shift = ind_overlap * n_atoms_per_res + ind_atom

                for i in range(n_cc_helices):
                    coordi_prev = hi_all[i][-n_atoms_overlap + ind_shift]
                    coordi_next = hi[i][ind_shift]
                    hi_all[i][-n_atoms_overlap + ind_shift] = (
                        1 - weight) * coordi_prev + weight * coordi_next

        for i in range(n_cc_helices):
            hi_rest = hi[i][n_atoms_overlap:]
            hi_all[i] = np.append(hi_all[i], hi_rest, axis=0)

    res_dimer = hi_all[0]
    for i in range(1, n_cc_helices):
        res_dimer = np.append(res_dimer, hi_all[i], axis=0)

    return res_dimer, rmsd