def lipid_rdfs_detail(**kwargs): """ Compute 2D lipid radial distribution functions. """ dat = kwargs['upstream']['lipid_abstractor'] imono = dat['monolayer_indices'] points = dat['points'] resnames = dat['resnames'] vecs = dat['vecs'] attrs, result = {}, {} # global scanrange from specs to reduce the distances data cutoff = kwargs['calc']['specs']['cutoff'] binsize = kwargs['calc']['specs']['binsize'] scanrange = np.arange(0, cutoff, binsize) # loop over monolayers for mn in range(2): # prepare pairs resnames_u = np.unique(resnames[np.where(imono == mn)]) pairs = ([([r], [r]) for r in resnames_u] + [([i], [j]) for i, j in itertools.combinations(resnames_u, 2)] + [(resnames_u, resnames_u)]) pairnames = [(i[0], j[0]) for i, j in pairs[:-1]] + [('all lipids', 'all lipids')] pairspec = dict(pairs=pairs, pairnames=pairnames, groups={}) # get coordinates for this leaflet coords = points[:, np.where(imono == mn)[0], :2] # tabulate rows for each group for pair, pairname in zip(pairspec['pairs'], pairspec['pairnames']): group_1 = np.where( np.in1d(resnames[np.where(imono == mn)], pair[0]))[0] group_2 = np.where( np.in1d(resnames[np.where(imono == mn)], pair[1]))[0] pairspec['groups'][pairname] = (group_1, group_2) nframes = len(coords) looper = [ dict(vec=vecs[fr], coords=coords[fr], bins=scanrange, groups=pairspec['groups']) for fr in range(nframes) ] incoming = np.array(basic_compute_loop(compute_rdf_imaged, looper)) #! note that we no longer take the mean over axis=0 here, compared to lipid_rdfs #! which allows us to later check the convergence of the RDFs obs = dict([(pair, np.array([i[pair] for i in incoming])) for pair in pairspec['pairnames']]) # package tag = '_mn%s' % mn attrs['cutoff'] = cutoff attrs['binsize'] = binsize attrs['pairs' + tag] = [[tuple(j) for j in i] for i in pairspec['pairs']] attrs['pairnames' + tag] = [tuple(i) for i in np.array(pairnames)] # save in sequence named by pairnames for pairname in attrs['pairnames' + tag]: result['counts' + tag + '_%s:%s' % tuple(pairname)] = obs[pairname] result['resnames'] = dat['resnames'] result['monolayer_indices'] = dat['monolayer_indices'] result['total_area'] = np.product(vecs.mean(axis=0)[:2]) return result, attrs
def hydrogen_bond_compactor(): global data_hbonds, bonds, obs for sn in sns: #---custom mapping for collecting hydrogen bonds bonds, obs = [ data_hbonds[sn]['data'][i] for i in ['bonds', 'observations'] ] #---! get resids for the protein and lipid_resnames from contact maps lipid_resnames = np.unique(data_contacts[sn]['data']['bonds'] [:, rowspec.index('target_resname')]) resids = data_contacts[sn]['data']['subject_residues_resids'] resname_combos = [(r, np.array([r])) for r in lipid_resnames ] + [('all lipids', np.array(lipid_resnames))] #---compute loop looper = [{ 'resid': resid, 'resname_set': resname_set } for resid in resids for resname_name, resname_set in resname_combos] compute_function = bond_counter incoming = basic_compute_loop(compute_function, looper, run_parallel=True) #---tacking on compacted data to mimic the form of the contact maps data_hbonds[sn]['data']['hbonds_compacted'] = np.array(incoming) data_hbonds[sn]['data']['pairs_resid_resname'] = np.array([ (resid, resname_name) for resid in resids for resname_name, resname_set in resname_combos ]).astype(str)
def measure_normal_deviation_from_wavy_surface(heights,vecs,curvilinear=False): """ Given heights on a regular grid, compute the average surface and then compute the """ global surf,surfs,mesh do_inflate = False inflate_factor = 10 surfs = heights #---average surface surf_average_base = surfs.mean(axis=0) if do_inflate: surf_average = inflate_lateral(surf_average_base,inflate_factor) else: surf_average = surf_average_base #---height of the average surface pivot = surf_average.mean() #---standardized box vectors for all calculations (see notes above) mvec_base = vecs.mean(axis=0) #---get height fluctuations to set the half box height maxflux = surfs.ptp()*1.1/2. #---new standard box vectors have the correct height and inflated XY dimensions inflate_factors = np.array(surf_average.shape).astype(float)/np.array(surf_average_base.shape) #---use globals for parallel if do_inflate: mvec = np.array([mvec_base[0]*inflate_factors[0],mvec_base[1]*inflate_factors[1],maxflux*2.]) else: mvec = np.array([mvec_base[0],mvec_base[1],maxflux*2.]) #---compute a reference surface in absolute points #---we use vertical center so that all heights are shifted center of the new box given by twice maxflux surf = boxstuff(height_recenter(literalize(surf_average,mvec),pivot=pivot,maxflux=maxflux),mvec) #---make the reference mesh (slow step) status('making mesh (curvilinear=%s)'%curvilinear,tag='compute') mesh = makemesh(surf,mvec,curvilinear=curvilinear) status('mesh is ready',tag='compute') looper = [dict(fr=fr,pivot=pivot,mvec=mvec,maxflux=maxflux) for fr in range(len(surfs))] incoming = basic_compute_loop(average_normal_projections,looper=looper,run_parallel=True) #---we must reshape and concatenate the points return np.reshape(incoming,(-1,)+surf_average.shape)
def salt_bridge_filter(): global data_contacts, bonds, obs, valid_salt_bridges for sn in sns: #---filter the bonds and observations from contact maps bonds_all = data_contacts[sn]['data']['bonds'] obs_all = data_contacts[sn]['data']['observations'] nframes = len(obs_all) salt_bridge_inds = [] #---loop over frames in the simulation for fr in range(nframes): status('filtering salt bridges from contact data', i=fr, looplen=nframes, tag='compute') #---find observed bonds for that frame bonds_inds = np.where(obs_all[fr] == 1.0)[0] frame = bonds_all[bonds_inds] hits_over_salt_bridges = [] for definition in valid_salt_bridges: matches_resname = frame[:, 0] == definition['resname'] matches_atom = np.in1d(frame[:, 2], definition['atoms']) matches_lipid_oxygen = np.array([i[0] for i in frame[:, 5]]) == 'O' matches = np.all( (matches_resname, matches_atom, matches_lipid_oxygen), axis=0) hits_over_salt_bridges.append(matches) frame_matches = np.where(np.any(hits_over_salt_bridges, axis=0)) #---save the observed salt bridges by index number for the master bond list salt_bridge_inds.append(bonds_inds[frame_matches]) #---get unique indices for the observed salt bridges salt_inds = np.unique(np.concatenate(salt_bridge_inds)) #---set global bonds and obs so they only contain salt bridges and then run the bond_counter bonds = bonds_all[salt_inds] obs = obs_all[:, salt_inds] status('salt nbonds for %s is %d' % (sn, len(salt_inds))) #---! get resids for the protein and lipid_resnames from contact maps lipid_resnames = np.unique(data_contacts[sn]['data']['bonds'] [:, rowspec.index('target_resname')]) resids = data_contacts[sn]['data']['subject_residues_resids'] resname_combos = [(r, np.array([r])) for r in lipid_resnames ] + [('all lipids', np.array(lipid_resnames))] #---compute loop looper = [{ 'resid': resid, 'resname_set': resname_set } for resid in resids for resname_name, resname_set in resname_combos] compute_function = bond_counter incoming = basic_compute_loop(compute_function, looper, run_parallel=True) #---tacking on compacted data to mimic the form of the contact maps data_contacts[sn]['data']['salt_compacted'] = np.array(incoming) if False: data_contacts[sn]['data']['pairs_resid_resname'] = np.array([ (resid, resname_name) for resid in resids for resname_name, resname_set in resname_combos ]).astype(str)
def compute_curvature_distributions(): """Compute curvature distributions.""" spacing = work.plots['lipid_mesh'].get('specs', {}).get('curvature_map_spacing', 2.0) global data survey = {} for snum, sn in enumerate(work.sns()): dat = data[sn]['data'] nframes = int(dat['nframes']) def get(mn, fr, name): return dat['%d.%d.%s' % (mn, fr, name)] ngrid = np.round(data[sn]['data']['vecs'].mean(axis=0)[:2] / spacing).astype(int) nmols = [int(dat['%d.1.nmol' % mn]) for mn in range(2)] start = time.time() survey[sn] = np.zeros(ngrid) for mn in range(2): #---formulate X,Y,curvature points xy = np.array([ get(mn, fr, 'points')[:nmols[mn], :2] for fr in range(nframes) ]) curvatures = np.array( [get(mn, fr, 'mean') for fr in range(nframes)]) global raw, fine raw = np.concatenate( (np.transpose(xy, (2, 0, 1)), np.reshape(curvatures, (1, nframes, -1)))).transpose(1, 2, 0) #---location of grid points on the unit square prop_pts = np.transpose( np.meshgrid(range(0, ngrid[0]), range( 0, ngrid[1]))) / np.array(ngrid).astype(float) #---using the new trick for getting points over frames via vectors and points from the unit square fine = (np.tile(np.reshape(prop_pts, (ngrid[0], ngrid[1], 1, 2)), (nframes, 1)) * dat['vecs'][:, :2]).transpose( (2, 0, 1, 3)) interp_accumulate = basic_compute_loop( interpolate_curvatures, [dict(fr=fr) for fr in range(nframes)]) survey[sn] += np.sum(interp_accumulate, axis=0) survey[sn] = survey[sn] / (2. * nframes) return survey
def drop_gaussians(self, **kwargs): """ Method for choosing the positions of Gaussians. """ pos_spec = kwargs.get('curvature_positions', {}) method = pos_spec.get('method', None) extent = kwargs.get('extents', {}).get('extent', {}) if method == 'protein_subselection': for sn in self.sns: selections = pos_spec.get('selections', None) if not selections: raise Exception('need selections in protein_subselection') #---determine the centers of the protein according to the selections #---...noting that the protein_abstractor points are stored by the residue, not bead/atom points = np.array([ np.transpose(self.data_prot[sn]['data']['points'], (1, 0, 2))[s] for s in selections ]) points = points.mean(axis=1)[..., :2] #---save the points for later self.memory[(sn, 'drop_gaussians_points')] = points ndrops = len(points) #---get data from the memory hqs = self.memory[(sn, 'hqs')] self.nframes = len(hqs) mn = hqs.shape[1:] vecs = self.memory[(sn, 'vecs')] vecs_mean = np.mean(vecs, axis=0) #---formulate the curvature request curvature_request = dict(curvature=1.0, mn=mn, sigma_a=extent, sigma_b=extent, theta=0.0) #---construct unity fields fields_unity = np.zeros((self.nframes, ndrops, mn[0], mn[1])) reindex, looper = zip( *[((fr, ndrop), dict(vecs=vecs[fr], centers=[points[ndrop][fr] / vecs[fr][:2]], **curvature_request)) for fr in range(self.nframes) for ndrop in range(ndrops)]) status('computing curvature fields for %s' % sn, tag='compute') incoming = basic_compute_loop(make_fields, looper=looper) #---! inelegant for ii, (fr, ndrop) in enumerate(reindex): fields_unity[fr][ndrop] = incoming[ii] self.memory[(sn, 'fields_unity')] = fields_unity elif method == 'pixel': #---recall that the loop over sns is pretty much redundant for sn in self.sns: #---construct a box-vector-scaled grid of points which we call "pixels" #---get data from the memory hqs = self.memory[(sn, 'hqs')] self.nframes = len(hqs) mn = hqs.shape[1:] vecs = self.memory[(sn, 'vecs')] vecs_mean = np.mean(vecs, axis=0) #---get the grid spacing from the metadata spacer = pos_spec.get('spacer', None) spacer_x, spacer_y = [ pos_spec.get('spacer_%s' % i, spacer) for i in 'xy' ] npts = (vecs_mean[:2] / np.array([spacer_x, spacer_y])).astype(int) posts = np.array([[ np.linspace(0, vecs[fr][d], npts[d] + 1) for d in range(2) ] for fr in range(self.nframes)]) fence = np.array([[(posts[fr][d][1:] + posts[fr][d][:-1]) / 2. for d in range(2)] for fr in range(self.nframes)]) points = np.array([ np.concatenate(np.transpose(np.meshgrid(*fence[fr]))) for fr in range(self.nframes) ]) ndrops = len(points[0]) #---formulate the curvature request curvature_request = dict(curvature=1.0, mn=mn, sigma_a=extent, sigma_b=extent, theta=0.0) #---construct unity fields fields_unity = np.zeros((self.nframes, ndrops, mn[0], mn[1])) reindex, looper = zip( *[((fr, ndrop), dict(vecs=vecs[fr], centers=[points[fr][ndrop] / vecs[fr][:2]], **curvature_request)) for fr in range(self.nframes) for ndrop in range(ndrops)]) status('computing curvature fields for %s' % sn, tag='compute') incoming = basic_compute_loop(make_fields, looper=looper) #---! inelegant for ii, (fr, ndrop) in enumerate(reindex): fields_unity[fr][ndrop] = incoming[ii] self.memory[(sn, 'fields_unity')] = fields_unity self.memory[(sn, 'drop_gaussians_points')] = points elif method == 'neighborhood': #---extra distance defines a border around the average hull extra_distance = pos_spec['distance_cutoff'] spacer = pos_spec['spacer'] def rotate2d(pts, angle): x = pts[:, 0] * np.cos(angle) - pts[:, 1] * np.sin(angle) y = pts[:, 1] * np.cos(angle) + pts[:, 0] * np.sin(angle) return np.transpose((x, y)) def arange_symmetric(a, b, c): return np.unique( np.concatenate((np.arange(a, b, c), -1 * np.arange(a, b, c)))) for sn in self.sns: ###---!!! beware this code might have an indexing problem !!! #---nope .. now that you fixed the index error each protein gets its own neighborhood presumably with an indeterminate position #---for each frame we compute the centroid and orientation points_all = self.data_prot[sn]['data']['points_all'] cogs = points_all.mean(axis=2).mean(axis=1)[:, :2] #---get the average set of points average_pts = points_all.mean(axis=0).mean(axis=0)[:, :2] average_pts -= average_pts.mean(axis=0) average_axis = principal_axis(average_pts) angle = np.arccos(np.dot(vecnorm(average_axis), [1.0, 0.0])) direction = 1.0 - 2.0 * (np.cross(vecnorm(average_axis), [1.0, 0.0]) < 0) rot = rotate2d(average_pts, direction * angle) #---get the span of the points plus the extra distance in each direction span_x, span_y = np.abs(rot).max(axis=0) + extra_distance ref_grid = np.concatenate( np.transpose( np.meshgrid(arange_symmetric(0, span_x, spacer), arange_symmetric(0, span_y, spacer)))) #import matplotlib as mpl;import matplotlib.pyplot as plt;ax = plt.subplot(111);ax.scatter(*average_pts.T);plt.show() #import ipdb;ipdb.set_trace() vecs = self.memory[(sn, 'vecs')] vecs_mean = np.mean(vecs, axis=0) #---for each frame, map the ref_grid onto the principal axis self.nframes = len(points_all) points = np.zeros((len(ref_grid), self.nframes, 2)) for fr in range(self.nframes): pts = points_all[fr].mean(axis=0)[:, :2] offset = pts.mean(axis=0) average_axis = principal_axis(pts - offset) angle = np.arccos(np.dot(vecnorm(average_axis), [1.0, 0.0])) direction = 1.0 - 2.0 * (np.cross(vecnorm(average_axis), [1.0, 0.0]) < 0) ref_grid_rot = rotate2d(ref_grid, direction * angle) + offset #---handle PBCs by putting everything back in the box ref_grid_rot_in_box = ( ref_grid_rot + (ref_grid_rot < 0) * vecs[fr, :2] - (ref_grid_rot >= vecs[fr, :2]) * vecs[fr, :2]) points[:, fr] = ref_grid_rot_in_box #---debug with a plot if desired if False: import matplotlib.pyplot as plt plt.scatter(*ref_grid_rot_in_box.T) from base.store import picturesave fn = 'fig.DEBUG.curvature_undulation_coupling_neighborhood' picturesave(fn, self.work.plotdir) raise Exception( 'dropped debugging image for your review and deletion ' 'to %s. remove it and then turn this debugger off to continue' % fn) #---save the position of the curvature fields for later self.memory[(sn, 'drop_gaussians_points')] = points ndrops = len(ref_grid) #---! ULTRA REPETITIVE WITH THE OTHER OPTIONS #---get data from the memory hqs = self.memory[(sn, 'hqs')] mn = hqs.shape[1:] #---formulate the curvature request curvature_request = dict(curvature=1.0, mn=mn, sigma_a=extent, sigma_b=extent, theta=0.0) #---construct unity fields fields_unity = np.zeros((self.nframes, ndrops, mn[0], mn[1])) reindex, looper = zip( *[((fr, ndrop), dict(vecs=vecs[fr], centers=[points[ndrop][fr] / vecs[fr][:2]], **curvature_request)) for fr in range(self.nframes) for ndrop in range(ndrops)]) status('computing curvature fields for %s' % sn, tag='compute') incoming = basic_compute_loop(make_fields, looper=looper, run_parallel=True) #---! inelegant for ii, (fr, ndrop) in enumerate(reindex): fields_unity[fr][ndrop] = incoming[ii] self.memory[(sn, 'fields_unity')] = fields_unity if False: import matplotlib as mpl import matplotlib.pyplot as plt plt.imshow(fields_unity[0][0].T) plt.show() import ipdb ipdb.set_trace() #---one field per protein, for all proteins elif method in [ 'protein_dynamic_single', 'protein_dynamic_single_uniform' ]: #---! the following code is very repetitive with the protein subselection method for sn in self.sns: #---points_all is nframes by proteins by beads/atoms by XYZ points = self.data_prot[sn]['data']['points_all'].mean( axis=2)[..., :2].transpose(1, 0, 2) #---save the points for later self.memory[(sn, 'drop_gaussians_points')] = points ndrops = len(points) #---get data from the memory hqs = self.memory[(sn, 'hqs')] self.nframes = len(hqs) mn = hqs.shape[1:] vecs = self.memory[(sn, 'vecs')] vecs_mean = np.mean(vecs, axis=0) #---formulate the curvature request curvature_request = dict(curvature=1.0, mn=mn, sigma_a=extent, sigma_b=extent, theta=0.0) #---construct unity fields fields_unity = np.zeros((self.nframes, ndrops, mn[0], mn[1])) reindex, looper = zip( *[((fr, ndrop), dict(vecs=vecs[fr], centers=[points[ndrop][fr] / vecs[fr][:2]], **curvature_request)) for fr in range(self.nframes) for ndrop in range(ndrops)]) status('computing curvature fields for %s' % sn, tag='compute') incoming = basic_compute_loop(make_fields, looper=looper) #---! inelegant for ii, (fr, ndrop) in enumerate(reindex): fields_unity[fr][ndrop] = incoming[ii] self.memory[(sn, 'fields_unity')] = fields_unity else: raise Exception('invalid selection method')