def permutInverse(n): """Returns inverse permutation given integers in range(len(n)), such that permitInverse(permutInverse(range(4)))==range(4). """ n = Numeric.asarray(n) pInv = Numeric.argsort(n) assert Numeric.all(Numeric.equal(n, Numeric.argsort(pInv))), "Inverse not successful; input should be permutation of range(len(input))." return pInv
def __init__(self, data, sort='binding'): """data is a list of Conformations. """ self.data = data # extract the binding (default) or docking energy if sort == 'docking': energy_list = [d.docking_energy for d in data] elif sort == 'intermolecular': energy_list = [d.intermol_energy for d in data] elif sort == 'flexres': if hasattr(data[0], 'subset'): ind = len(data[0].getCoords()) - len(data[0].subset) else: print "no subsets have been set up for conformations!" return "ERROR" for d in data: d.flexres_energy = Numeric.add.reduce(d.total_energies[ind:]) d.flexres_index = ind energy_list = [d.flexres_energy for d in data] else: energy_list = [d.binding_energy for d in data] # sort the conformations by energy self.argsort = Numeric.argsort(energy_list) self.energy_used = sort # save the pair-wise distances for reuse by get_distance self.dist_matrix = Numeric.zeros([len(data), len(data)]) - 1.0 # set the customizable get_distance method to default self.set_get_distance(self._get_distance_default) self.clustering_dict = {}
def sortPoly(self, order=-1): if __debug__: if hasattr(DejaVu, 'functionName'): DejaVu.functionName() """None <- sortPoly(order=-1) Sorts the geometry polygons according to z values of polygon's geomtric centers. Order=-1 sorts by furthest z first, order=1 sorts by closest z first""" # FIXME will not work with instance matrices mat = self.GetMatrix() mat = Numeric.reshape(mat, (4,4)) vt = self.vertexSet.vertices*mat if vt is None: return triv = Numeric.take(vt, self.faceSet.faces.array) trig = Numeric.sum(triv,1)/3. trigz = trig[:,2] #triangle's center of gravity z value ind = Numeric.argsort(trigz) # sorted indices if len(self.faceSet.faces.array): faces = Numeric.take(self.faceSet.faces.array, ind[::order]) if self.shading==GL.GL_FLAT: # we also need to re-arrange the # face normals if self.normals is None: normals = None else: if len(self.normals)>1: normals = Numeric.take(self.normals, ind[::order]) else: normals = self.normals else: normals = None self.Set(faces=faces, fnormals=normals)
def rankData(n, inverse=False): """Returns ranks of 1D Numeric array in range 1...shape[0]. """ n = Numeric.asarray(n) assert Numeric.rank(n) == 1 r = Numeric.zeros(n.shape[0], Numeric.Float) Numeric.put(r, Numeric.argsort(n), Numeric.arange(n.shape[0])) if inverse: return -1*r+n.shape[0] else: return r+1
def patchAround(self, center, nAtoms): """ patchAround( float_center, int_nAtoms ) -> mask for self.model Create single patch of nAtoms atoms that are closest to center. """ dist = self.__distances(center) order = N.argsort(dist) r = N.zeros(len(self.model), 'i') N.put(r, order[:nAtoms], 1) return self.centerPatch(r)
def get_min_distance_objid(self, objid, not_objid_set, sheets_only=False): """ Get the sheet or helix with minimum distance from the supplied sheet or helix, specified by id (e.g. 'A' for a sheet or 'HELIX_A_10' for a helix). Optionally, set the element that was found to infinity so that this routine can be used iteratively to find only elements that have not already been found. Paremeters: objid - sheet id (e.g. 'A') or helix id (e.g. 'HELIX_A_10') of the object to find the id of the closest object for. not_objid_set - set of objids that we do NOT want to find. Used so we can find the nearest element to an already positioned element that is not itself an already positioned element. sheets_only - (Default False) only find sheets, not helices. Uses data members (readonly): sheet_index_map reverse_sheet_index_map sheet_dist_matrix Return value: tuple (id, dist) where id (as per the objid paramter) of the closest sheet or helix to the speicfied one and dist is that smallest distance, and it is not in the not_objid_set. """ row = self.sheet_index_map[objid] mindist_index = Numeric.argmin(self.sheet_dist_matrix[row]) # get 1d array of object ids sorted (ascending) by their distance # from the target objid in the sheet dist matrix # NB: use of argsort depends on having set diagonal (self distance) # elements to inf instead of 0 in calc_sse_dist_matrix(). objids_sorted_by_dist = Numeric.argsort(self.sheet_dist_matrix[row]) # find the first (i.e. smallest distance) id that is not in # the not_objid_set mindist_index = None for mindist_index in objids_sorted_by_dist: mindist_objid = self.reverse_sheet_index_map[mindist_index] if ( (mindist_objid not in not_objid_set) and (not sheets_only or len(mindist_objid) == 1) ): dist = self.sheet_dist_matrix[row, mindist_index] break return (mindist_objid, dist)
def get_min_distance_objid(self, objid, not_objid_set, sheets_only=False): """ Get the sheet or helix with minimum distance from the supplied sheet or helix, specified by id (e.g. 'A' for a sheet or 'HELIX_A_10' for a helix). Optionally, set the element that was found to infinity so that this routine can be used iteratively to find only elements that have not already been found. Paremeters: objid - sheet id (e.g. 'A') or helix id (e.g. 'HELIX_A_10') of the object to find the id of the closest object for. not_objid_set - set of objids that we do NOT want to find. Used so we can find the nearest element to an already positioned element that is not itself an already positioned element. sheets_only - (Default False) only find sheets, not helices. Uses data members (readonly): sheet_index_map reverse_sheet_index_map sheet_dist_matrix Return value: tuple (id, dist) where id (as per the objid paramter) of the closest sheet or helix to the speicfied one and dist is that smallest distance, and it is not in the not_objid_set. """ row = self.sheet_index_map[objid] mindist_index = Numeric.argmin(self.sheet_dist_matrix[row]) # get 1d array of object ids sorted (ascending) by their distance # from the target objid in the sheet dist matrix # NB: use of argsort depends on having set diagonal (self distance) # elements to inf instead of 0 in calc_sse_dist_matrix(). objids_sorted_by_dist = Numeric.argsort(self.sheet_dist_matrix[row]) # find the first (i.e. smallest distance) id that is not in # the not_objid_set mindist_index = None for mindist_index in objids_sorted_by_dist: mindist_objid = self.reverse_sheet_index_map[mindist_index] if ((mindist_objid not in not_objid_set) and (not sheets_only or len(mindist_objid) == 1)): dist = self.sheet_dist_matrix[row, mindist_index] break return (mindist_objid, dist)
def orderCenters(self, points, origin=None): """ Order random points by increasing distance to first or to origin. points - n x 3 array of float, random center coordinates origin - 3 array of float -> [ int ], indices into points ordered by increasing distance """ origin = origin if origin is None: origin = points[0] dist = self.__distances(origin, points) return N.take(points, N.argsort(dist))
def centerPatch(self, patch_mask): """ patch_mask - [ 1|0 ], mask of non-centered patch -> [ 1|0 ], mask of patch around geometric center of first patch """ c = self.model.center(patch_mask) dist = self.__distances(c) n_atoms = len(N.nonzero(patch_mask)) i_dist = N.argsort(dist)[:n_atoms] result = N.zeros(len(patch_mask)) N.put(result, i_dist, 1) return result
def pca(M): "Perform PCA on M, return eigenvectors and eigenvalues, sorted." T, N = shape(M) # if there are fewer rows T than columns N, use snapshot method if T < N: C = dot(M, t(M)) evals, evecsC = eigenvectors(C) # HACK: make sure evals are all positive evals = where(evals < 0, 0, evals) evecs = 1. / sqrt(evals) * dot(t(M), t(evecsC)) else: # calculate covariance matrix K = 1. / T * dot(t(M), M) evals, evecs = eigenvectors(K) # sort the eigenvalues and eigenvectors, descending order order = (argsort(evals)[::-1]) evecs = take(evecs, order, 1) evals = take(evals, order) return evals, t(evecs)
def __find_intervals(self, l): l = N.array(l) l = N.take(l, N.argsort(l)) globals().update(locals()) break_points = N.nonzero(N.greater(l[1:] - l[:-1], 1)) start = 0 intervals = [] for i in range(len(break_points)): index = break_points[i] intervals.append(tuple(N.take(l, range(start, index + 1)))) start = index + 1 intervals.append(tuple(l[start:])) return intervals
def __find_intervals(self, l): l = N.array(l) l = N.take(l, N.argsort(l)) globals().update( locals() ) break_points = N.nonzero(N.greater(l[1:] - l[:-1], 1)) start = 0 intervals = [] for i in range(len(break_points)): index = break_points[i] intervals.append(tuple(N.take(l, range(start, index + 1)))) start = index + 1 intervals.append(tuple(l[start:])) return intervals
def confidenceInterval(self, level): """ confidenceInterval(self, level) @param level: confidence level (e.g. 0.68 for stdev interval) @type level: float @return: start and end of the confidence interval containing |level|*100 % of the probability @rtype: float, float """ order = N.argsort(self.p).tolist() cumulative = N.add.accumulate(N.take(self.p, order)) * self.delta_x ind = N.nonzero(N.greater_equal(cumulative, 1. - level)) sub_set = order[ind[0]:] intervals = self.__find_intervals(sub_set) boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals] return tuple(boundaries)
def sortPoly(self, geom,vt, order=-1): """None <- sortPoly(order=-1) Sorts the geometry polygons according to z values of polygon's geomtric centers. Order=-1 sorts by furthest z first, order=1 sorts by closest z first""" # FIXME will not work with instance matrices #mat = geom.GetMatrix() #mat = Numeric.reshape(mat, (4,4)) #vt = geom.vertexSet.vertices*mat if vt is None: return triv = Numeric.take(vt, geom.faceSet.faces.array) trig = Numeric.sum(triv,1)/3. trigz = trig[:,2] #triangle's center of gravity z value ind = Numeric.argsort(trigz) # sorted indices if len(geom.faceSet.faces.array): faces = Numeric.take(geom.faceSet.faces.array, ind[::order]) n = geom.getFNormals() n = geom.faceSet.normals * geom.GetMatrix() normals = Numeric.take(n, ind[::order]) # #if geom.shading==GL.GL_FLAT: # we also need to re-arrange the # # face normals # if geom.normals is None: # normals = None # else: # if len(geom.normals)>1: #normals = Numeric.take(geom.normals, ind[::order]) # else: # normals = geom.normals # else: # normals = None #normals = None #geom.Set(faces=faces, fnormals=normals) return faces.copy(),normals.copy()
def sortPoly(self, geom, vt, order=-1): """None <- sortPoly(order=-1) Sorts the geometry polygons according to z values of polygon's geomtric centers. Order=-1 sorts by furthest z first, order=1 sorts by closest z first""" # FIXME will not work with instance matrices #mat = geom.GetMatrix() #mat = Numeric.reshape(mat, (4,4)) #vt = geom.vertexSet.vertices*mat if vt is None: return triv = Numeric.take(vt, geom.faceSet.faces.array) trig = Numeric.sum(triv, 1) / 3. trigz = trig[:, 2] #triangle's center of gravity z value ind = Numeric.argsort(trigz) # sorted indices if len(geom.faceSet.faces.array): faces = Numeric.take(geom.faceSet.faces.array, ind[::order]) n = geom.getFNormals() n = geom.faceSet.normals * geom.GetMatrix() normals = Numeric.take(n, ind[::order]) # #if geom.shading==GL.GL_FLAT: # we also need to re-arrange the # # face normals # if geom.normals is None: # normals = None # else: # if len(geom.normals)>1: #normals = Numeric.take(geom.normals, ind[::order]) # else: # normals = geom.normals # else: # normals = None #normals = None #geom.Set(faces=faces, fnormals=normals) return faces.copy(), normals.copy()
def wavenumber_integration(tb, wl, response): x = [] y = [] for i in range(len(wl)): x.append(1. / wl[i]) #print "wavelength,wavenumber: ",wl[i],1./wl[i] yval_length = blackbody(wl[i], tb) * response[i] * (wl[i] * wl[i]) yval = blackbody(1. / wl[i], tb, "wavenumber") * response[i] #print "YVAL: ",yval,yval_length y.append(yval) x = Numeric.array(x) y = Numeric.array(y) y = Numeric.choose(Numeric.argsort(x), y) x = Numeric.sort(x) response = Numeric.array(response) res = integral(x, y) # Normalisation: norm = integral(x, response) #print "Norm: ",norm return res / norm
def run(self): import time from numpy.oldnumeric import argsort while not self.isStopped(): bindings = self.getBindings() ## loop through all messages and check for ## incoming events incoming = {} for tid, message in bindings.keys(): if P.probe(tid, message): P.recv(tid, message) ## parameters must be tuple parameters = pvm.unpack() ## self.post_message_received(message, tid, parameters) value = (message, parameters[0], parameters[1]) try: incoming[tid].append(value) except: incoming[tid] = [value] ## for every incoming message call ## bound method ## TODO: do this in a specific order! ## probably pvm supports a sort of time-stamp or so. for tid, values in incoming.items(): time_stamps = map(lambda v: v[1], values) indices = argsort(time_stamps) for i in indices: message = values[i][0] parameters = values[i][2] ## self.post_execute_method(message, tid, parameters) if parameters is None: bindings[(tid, message)]() else: bindings[(tid, message)](*parameters) ## wait some time time.sleep(self.getMessageLoopDelay()) ## if message-loop is stopped, wait until ## it is continued self.__loopEvent.wait()
def read(self, filename): """ 1.5 2.0 binding 0 0 0 0 0.000 0.000 0.000 -14.645 1 0 1 0 2.449 0.000 0.000 -14.636 0 1 0 1 1.281 1.281 1.281 -14.424 1 1 1 1 2.548 1.014 1.014 -14.210 | | | | | |-rmsd from cluster seed @ 2.0 tolerance | |-rmsd from cluster seed @ 1.0 tolerance |-rmsd from overall reference structure """ file_ptr = open(filename) lines = file_ptr.readlines() # read the file file_ptr.close() d = self.clustering_dict # local copy # see if there's the last char is a 'd' or 'b' or 'e'to denote energy word_list = string.split(lines[0]) if word_list[-1][0] in ['b', 'd', 'e']: file_energy_used = word_list[-1] ind = string.find(lines[0], file_energy_used) lines[0] = lines[0][:ind] # strip energy symbol else: file_energy_used = 'binding' # ??? make sure we're consistent with the argsort #check that self has done some clustering before this if len(self.clustering_dict) > 0: assert file_energy_used[0] == self.energy_used[ 0], 'Cluster energy mismatch' else: #here the dlg had no clustering in it self.energy_used = file_energy_used #redo argsort with file_energy_used if file_energy_used[0] == 'd': energy_list = [conf.docking_energy for conf in self.data] else: energy_list = [conf.binding_energy for conf in self.data] # sort the conformations by energy self.argsort = Numeric.argsort(energy_list) #print "self.argsort=", self.argsort t_list = map(float, string.split(lines[0])) #t_list for the example is [0.5, 2.0] num_t = len(t_list) #number of clusterings for tolerance in t_list: # initialize the keys if d.has_key(tolerance): raise RuntimeError, "overwriting existing clustering" c = d[tolerance] = Clustering() c.tolerance = tolerance # cx is the index into self.data, the list of conformatons # NEW FORMAT: # 0 0 0 0 0.000 0.000 0.000 -14.645 #first line has list of tolerances for cx, l in enumerate(lines[1:]): ll = l.split() #eg: 2 tolerances gives [ 0, 0, 0, 0, 0.000, 0.000, 0.000,-14.645] #num_t *2 c_list = map(int, ll[:num_t * 2]) data_list = map(float, ll[num_t * 2:]) for t, i in zip(t_list, xrange(len(c_list) / 2)): cluster_index = c_list[2 * i] cluster_rank = c_list[2 * i + 1] conf = self.data[int(self.argsort[cx])] if cluster_rank == 0: assert len(d[t]) == cluster_index d[t].append(Cluster(conf)) else: # add conformation to its cluster assert len(d[t][cluster_index]) == cluster_rank d[t][cluster_index].append(conf) # tell the conformation what cluster(s) it belongs to... conf.cluster_dict[t] = (cluster_index, cluster_rank) conf.refRMS = data_list[0] conf.clRMS = data_list[1]
def read(self, filename): """ 1.5 2.0 binding 0 0 0 0 0.000 0.000 0.000 -14.645 1 0 1 0 2.449 0.000 0.000 -14.636 0 1 0 1 1.281 1.281 1.281 -14.424 1 1 1 1 2.548 1.014 1.014 -14.210 | | | | | |-rmsd from cluster seed @ 2.0 tolerance | |-rmsd from cluster seed @ 1.0 tolerance |-rmsd from overall reference structure """ file_ptr = open(filename) lines = file_ptr.readlines() # read the file file_ptr.close() d = self.clustering_dict # local copy # see if there's the last char is a 'd' or 'b' or 'e'to denote energy word_list = string.split(lines[0]) if word_list[-1][0] in ['b', 'd', 'e']: file_energy_used = word_list[-1] ind = string.find(lines[0], file_energy_used) lines[0] = lines[0][:ind] # strip energy symbol else: file_energy_used = 'binding' # ??? make sure we're consistent with the argsort #check that self has done some clustering before this if len(self.clustering_dict)>0: assert file_energy_used[0] == self.energy_used[0], 'Cluster energy mismatch' else: #here the dlg had no clustering in it self.energy_used = file_energy_used #redo argsort with file_energy_used if file_energy_used[0] == 'd': energy_list = [conf.docking_energy for conf in self.data] else: energy_list = [conf.binding_energy for conf in self.data] # sort the conformations by energy self.argsort = Numeric.argsort(energy_list) #print "self.argsort=", self.argsort t_list = map(float, string.split(lines[0])) #t_list for the example is [0.5, 2.0] num_t = len(t_list) #number of clusterings for tolerance in t_list: # initialize the keys if d.has_key(tolerance): raise RuntimeError, "overwriting existing clustering" c = d[tolerance] = Clustering() c.tolerance = tolerance # cx is the index into self.data, the list of conformatons # NEW FORMAT: # 0 0 0 0 0.000 0.000 0.000 -14.645 #first line has list of tolerances for cx, l in enumerate(lines[1:]): ll = l.split() #eg: 2 tolerances gives [ 0, 0, 0, 0, 0.000, 0.000, 0.000,-14.645] #num_t *2 c_list = map(int, ll[:num_t*2]) data_list = map(float, ll[num_t*2:]) for t, i in zip(t_list, xrange(len(c_list)/2)): cluster_index = c_list[2*i] cluster_rank = c_list[2*i+1] conf = self.data[int(self.argsort[cx])] if cluster_rank == 0: assert len(d[t]) == cluster_index d[t].append(Cluster(conf)) else: # add conformation to its cluster assert len(d[t][cluster_index]) == cluster_rank d[t][cluster_index].append(conf) # tell the conformation what cluster(s) it belongs to... conf.cluster_dict[t] = (cluster_index, cluster_rank) conf.refRMS = data_list[0] conf.clRMS = data_list[1]
def process_data(data, referencing=None, n_points=200, exposure_cutoff=0.05, atom_type='H', exclude_entries=(),molType='protein'): stats = process_secondary(data, referencing, n_points, atom_type, exclude_entries=exclude_entries,molType=molType) ## decompose with respect to accessibility S = {} bounds = {} for key, values in stats.items(): shifts, exposure = values ind = Numeric.argsort(exposure) shifts = Numeric.take(shifts, ind) exposure = Numeric.take(exposure, ind) B = [] mask = Numeric.less(exposure, exposure_cutoff) ind = Numeric.nonzero(mask) if len(shifts)-len(ind) < n_points: ind = range(len(shifts)) if len(ind) >= n_points: print key, 0, len(ind), len(shifts) S[key + (0,)] = (Numeric.take(shifts, ind), Numeric.take(exposure, ind)) B.append(exposure[len(ind)-1]) shifts = shifts[len(ind):] exposure = exposure[len(ind):] i = 1 else: i = 0 n_classes = len(exposure) / int(n_points) n = int(len(exposure) / float(max(1, n_classes))) + 1 while len(shifts) > n: print key, i, len(shifts[:n]), len(shifts) S[key + (i,)] = shifts[:n], exposure[:n] B.append(exposure[n]) shifts = shifts[n:] exposure = exposure[n:] i += 1 if len(shifts): print key, i, len(shifts) S[key + (i,)] = shifts, exposure B.append(exposure[-1]) bounds[key] = B return S, bounds