Exemplo n.º 1
0
def distmatrix(uni,idxs,chosenselections,co,mindist=False, mindist_mode=None, type1char='p',type2char='n'):
    numframes = uni.trajectory.numframes
    final_percmat = np.zeros((len(chosenselections),len(chosenselections)))
    log.info("Distance matrix will be %dx%d (%d elements)" % (len(idxs),len(idxs),len(idxs)**2))
    a=1
    distmats=[]

    if mindist:
        P = []
        N = []
        Pidxs = []
        Nidxs = []
        Psizes = []
        Nsizes = []

        for i in range(len(idxs)):
            if idxs[i][3][-1] == type1char:
                P.append(chosenselections[i])
                Pidxs.append(idxs[i])
                Psizes.append(len(chosenselections[i]))
            elif idxs[i][3][-1] == type2char:
                N.append(chosenselections[i])
                Nidxs.append(idxs[i])
                Nsizes.append(len(chosenselections[i]))
            else: 
                raise

        Nsizes = np.array(Nsizes, dtype=np.int)
        Psizes = np.array(Psizes, dtype=np.int)

	if mindist_mode == "diff":
            sets = [(P,N)]
            sets_idxs = [(Pidxs,Nidxs)]
            sets_sizes = [(Psizes,Nsizes)]
        elif mindist_mode == "same":
            sets = [(P,P),(N,N)]
            sets_idxs = [(Pidxs,Pidxs),(Nidxs,Nidxs)]
            sets_sizes = [(Psizes,Psizes),(Nsizes,Nsizes)]
        elif mindist_mode == "both":
            sets = [(chosenselections, chosenselections)]
            sets_idxs = [(idxs, idxs)]
            sizes =  [len(s) for s in chosenselections]
            sets_sizes = [(sizes,sizes)]
                
        else: raise

        percmats = []
        coords = []
        for s in sets:
            coords.append([[],[]])

        for ts in uni.trajectory:
            sys.stdout.write( "Caching coordinates: frame %d / %d (%3.1f%%)\r" % (a,numframes,float(a)/float(numframes)*100.0) )
            sys.stdout.flush()
            a+=1
            for si,s in enumerate(sets):
                if s[0] == s[1]: # triangular case
                    log.info("Caching coordinates...")
                    for group in s[0]:
                        coords[si][0].append(group.coordinates())
                        coords[si][1].append(group.coordinates())
                else: # square case
                    log.info("Caching coordinates...")
                    for group in s[0]:
                        coords[si][0].append(group.coordinates())
                    for group in s[1]:
                        coords[si][1].append(group.coordinates())

        for si,s in enumerate(sets): # recover the final matrix
            if s[0] == s[1]:
                this_coords = np.array(np.concatenate(coords[si][0]),dtype=np.float64)

                inner_loop = LoopDistances(this_coords, this_coords, co)
                percmats.append(inner_loop.run_triangular_mindist(sets_sizes[si][0]))

            else:
                this_coords1 = np.array(np.concatenate(coords[si][0]),dtype=np.float64)
                this_coords2 = np.array(np.concatenate(coords[si][1]),dtype=np.float64)
                
                inner_loop = LoopDistances(this_coords1, this_coords2, co)

                percmats.append( inner_loop.run_square_mindist(sets_sizes[si][0], sets_sizes[si][1]))

        for si,s in enumerate(sets): # recover the final matrix
            Pidxs = sets_idxs[si][0]
            Nidxs = sets_idxs[si][1]
            if s[0] == s[1]: # triangular case
                for j in range(len(s[0])):
                    for k in range(0,j):
                        final_percmat[idxs.index(Pidxs[j]), idxs.index(Pidxs[k])] = percmats[si][j,k]
                        final_percmat[idxs.index(Pidxs[k]), idxs.index(Pidxs[j])] = percmats[si][j,k]
            else: # square case
                for j in range(len(s[0])):
                    for k in range(len(s[1])):
                        final_percmat[idxs.index(Pidxs[j]), idxs.index(Nidxs[k])] = percmats[si][j,k]
                        final_percmat[idxs.index(Nidxs[k]), idxs.index(Pidxs[j])] = percmats[si][j,k]
 
        final_percmat = np.array(final_percmat, dtype=np.float)/numframes*100.0
                     
    else:
        all_coms = []
        for ts in uni.trajectory:
            sys.stdout.write( "now analyzing: frame %d / %d (%3.1f%%)\r" % (a,numframes,float(a)/float(numframes)*100.0) )
            sys.stdout.flush()
            a+=1
            distmat = np.zeros((len(chosenselections),len(chosenselections)))
            coms = np.zeros([len(chosenselections),3])
            for j in range(len(chosenselections)):
                coms[j,:] = chosenselections[j].centerOfMass()
            all_coms.append(coms)

        all_coms = np.concatenate(all_coms)
        inner_loop = LoopDistances(all_coms, all_coms, co)
        percmat = inner_loop.run_triangular_distmatrix(coms.shape[0])
        distmats = []
        final_percmat = np.array(percmat, dtype=np.float)/numframes*100.0


    return (final_percmat,distmats)
Exemplo n.º 2
0
def dopotential(kbp_atomlist, residues_list, potential_file, seq_dist_co = 0, grof = None, xtcf = None, pdbf = None, uni = None, pdb = None, dofullmatrix = True, kbT=1.0):

    residues_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","HIS", "ILE","LEU","LYS","MET","PHE","PRO","SER","THR","TRP","TYR","VAL"] # Residues for which the potential is defined: all except G 

    log.info("Loading potential definition . . .")
    sparses = parse_sparse(potential_file)
    log.info("Loading input files...")

    if not pdb or not uni:
        if not pdbf or not grof or not xtcf:
            raise ValueError
        pdb,uni = loadsys(pdbf,grof,xtcf)

    ok_residues = []
    discarded_residues = set()
    residue_pairs = []
    atom_selections = []
    ordered_sparses = []
    numframes = len(uni.trajectory)

    for i in range(len(uni.residues)):
        if uni.residues[i].name in residues_list:
            ok_residues.append(i)
        else:
            discarded_residues.add(uni.residues[i])
            continue
        for j in ok_residues[:-1]:
            ii = i 
            if not (abs(i-j) < seq_dist_co or uni.residues[ii].segment.name != uni.residues[j].segment.name): 
                if uni.residues[j].name < uni.residues[ii].name:
                    ii,j = j,ii
                this_sparse = sparses[uni.residues[ii].name][uni.residues[j].name]
                this_atoms = (kbp_atomlist[uni.residues[ii].name][this_sparse.p1_1],
                              kbp_atomlist[uni.residues[ii].name][this_sparse.p1_2],
                              kbp_atomlist[uni.residues[j].name][this_sparse.p2_1],
                              kbp_atomlist[uni.residues[j].name][this_sparse.p2_2])
                try:
                    selected_atoms = mda.core.AtomGroup.AtomGroup((uni.residues[ii].atoms[uni.residues[ii].atoms.names().index(this_atoms[0])], 
                                      uni.residues[ii].atoms[uni.residues[ii].atoms.names().index(this_atoms[1])],
                                      uni.residues[j].atoms[uni.residues[j].atoms.names().index(this_atoms[2])],
                                      uni.residues[j].atoms[uni.residues[j].atoms.names().index(this_atoms[3])]))
                except: 
                    log.warning("could not identify essential atoms for the analysis (%s%s, %s%s)" % ( uni.residues[ii].name, uni.residues[ii].id, uni.residues[j].name, uni.residues[j].id ))
                    continue
                residue_pairs.append((ii,j))
                atom_selections.append(selected_atoms)
                ordered_sparses.append(this_sparse)

    scores = np.zeros((len(residue_pairs)), dtype=float)

    a=0    
    #log.info("the following residues will not be considered: %s" % (", ".join(["%s %s"%(res.name,res.id) for res in discarded_residues] )))
    
    coords = None
    #coords = atom_selections[0].coordinates()
    #for sel in atom_selections[1:]:
        #coords = np.concatenate((coords, sel.coordinates()))
    
    for ts in uni.trajectory:
        tmp_coords = []
        sys.stdout.write( "now analyzing: frame %d / %d (%3.1f%%)\r" % (a,numframes,float(a)/float(numframes)*100.0) )
        sys.stdout.flush()
        a+=1
        for sel in atom_selections:    
	    #coords = np.concatenate((coords, sel.coordinates()))
    	    tmp_coords.append(sel.coordinates())
        coords = np.array(np.concatenate(tmp_coords),dtype=np.float64)
        
        #if coords == None:
            #coords = tmp_coords
        #else:
            #coords = np.concatenate([coords] + [tmp_coords])	
    #coords = np.array(coords, dtype=np.float64)

        #coords = np.array(np.concatenate(coords),dtype=np.float64)

        inner_loop = LoopDistances(coords, coords, None)

        distances = inner_loop.run_potential_distances(len(atom_selections), 4, 1)

        scores += calc_potential(distances, ordered_sparses, pdb, uni, seq_dist_co, kbT=kbT)
    
    scores /= float(len(uni.trajectory))
    outstr = ""

    for i,s in enumerate(scores):
        if abs(s) > 0.000001:
            outstr += "%s-%s%s:%s-%s%s\t%.3f\n" % (pdb.residues[residue_pairs[i][0]].segment.name, pdb.residues[residue_pairs[i][0]].name, pdb.residues[residue_pairs[i][0]].id, pdb.residues[residue_pairs[i][1]].segment.name, pdb.residues[residue_pairs[i][1]].name, pdb.residues[residue_pairs[i][1]].id, s)
        
    dm = None
    
    if dofullmatrix:
        dm = np.zeros((len(pdb.residues), len(pdb.residues)))
        for i,k in enumerate(residue_pairs):
            dm[k[0],k[1]] = scores[i]
            dm[k[1],k[0]] = scores[i]           
    
    return (outstr, dm)