def reload(): """Load everything for the plot only once.""" #---canonical globals list #---!? can this be made programmatic? global sns,scanrange,distributions,distances,data,calc,normalizers,middles #---reload sequence goes here data,calc = plotload(plotname) sns = work.specs['collections']['position']+['membrane-v538'] #---compute distance distributions cutoff = max([data[sn]['data']['water_distances'].max() for sn in sns]) #---globals for parallel functions scanrange = np.arange(0,cutoff,binsize) #---distances are indexed by concatenated frames, then ions distances = dict([(sn,np.concatenate(data[sn]['data']['water_distances'])) for sn in sns]) #---parallel compute looper = [dict(index=i,sn=sn) for sn in sns for i in range(len(distances[sn]))] incoming = np.array(basic_compute_loop(histogram_stack,looper=looper)) distributions = dict([(sn,np.array([incoming[ii] for ii,i in enumerate(looper) if i['sn']==sn])) for sn in sns]) #---normalization factors middles = (scanrange[1:]+scanrange[:-1])/2 areas = np.array([4*np.pi*binsize*middles[i]**2 for i in range(len(middles))]) #---atoms in a nm3: (1000g/18g is mol/L) * 1L/1000ml * 1ml/cm3 / (10**7 nm/cm)**3 = 33.46 water_density = 6.023*10**23*(1000.0/18)/1000/(10**(9-2))**3 #---window to estimate bulk, lower than the dropoff, higher than the first two shells bulk_window_raw = (0.75,1.0) #---normalize the ion-water density for all RDF measurements #---note that this is repeated below for zone-specific normalizations normalizers = {} for sn in sns: bulk_window = np.where(np.all((scanrange>=bulk_window_raw[0], scanrange<=bulk_window_raw[1]),axis=0))[0] #---reestimate ion-water pseudo-density at supposed bulk-like distances water_density = (distributions[sn][slice(None,None),bulk_window]/areas[bulk_window]).mean() normalizers[sn] = areas*water_density
def lipid_mesh_partners(**kwargs): """ Compute bilayer midplane structures for studying undulations. """ #---parameters sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] #---! deprecated random trials: n_trials = kwargs['calc']['specs']['n_trials'] do_randomize = False #---globals for parallel global dat, results, rxmlook dat = kwargs['upstream']['lipid_mesh'] nmols = [int(dat[i2s(mn, 0, 'nmol')]) for mn in range(2)] nframes = int(dat['nframes']) resnames = dat['resnames'] attrs, results = {}, {} #---code adapted from previous version at simuluxe and binding_combinator resnames = np.array(dat['resnames']) nframes = dat['nframes'] lipids = np.array( list(resnames[np.sort(np.unique(resnames, return_index=True)[1])])) reslist = list( np.array(resnames)[np.sort(np.unique(resnames, return_index=True)[1])]) results['reslist'] = reslist #---collect statistics for pairs and triples for nn in [2, 3]: combos = np.array([ ''.join(j) for j in itertools.product( ''.join([str(i) for i in range(nn + 1)]), repeat=len(lipids)) if sum([int(k) for k in j]) == nn ]) combonames = [ tuple(v) for v in [ np.concatenate([[lipids[ww]] * int(w) for ww, w in enumerate(l)]) for l in combos ] ] results['combos_%d' % nn] = combos results['combonames_%d' % nn] = combonames combolookup = np.sum([np.array(combonames) == r for r in reslist], axis=2).T combolookup_str = [''.join(['%s' % s for s in i]) for i in combolookup] results['combo_lookup_%d' % nn] = combolookup results['combo_lookup_str_%d' % nn] = combolookup_str #---determine monolayer-specific residue indices imono = dat['monolayer_indices'] nmols = [np.sum(dat['monolayer_indices'] == i) for i in range(2)] resnames = np.array(dat['resnames']) rxm = [[ np.array([ np.where(np.where(imono == mn)[0] == i)[0][0] for i in np.where(np.all((imono == mn, resnames == rn), axis=0))[0] ]) for rn in reslist ] for mn in range(2)] rxmlook = [np.zeros(n) for n in nmols] for mn in range(2): for ri, r in enumerate(rxm[mn]): if r != []: rxmlook[mn][r] = ri #---count in parallel counts_trials = dict([(nn, []) for nn in [2, 3]]) counts_observed = dict([(nn, None) for nn in [2, 3]]) for nn in [2, 3]: status('observations for nn=%d' % (nn), tag='compute') looper = [ dict(fr=fr, mn=mn, nn=nn) for mn in range(2) for fr in range(nframes) ] incoming = basic_compute_loop(counter, looper) #---reindex data mn,fr,combo counts_observed[nn] = np.concatenate(incoming).reshape( (2, nframes, len(results['combonames_%d' % nn]))) if do_randomize: for trial in range(n_trials): results['rxmlook_rand'] = [ np.random.permutation(r) for r in rxmlook ] status('randomize trial for nn=%d trial=%d/%d' % (nn, trial + 1, n_trials), tag='compute') looper = [ dict(fr=fr, mn=mn, nn=nn, random=True) for mn in range(2) for fr in range(nframes) ] incoming = basic_compute_loop(counter, looper) counts_trials[nn].append( np.concatenate(incoming).reshape( (2, nframes, len(results['combonames_%d' % nn])))) if do_randomize: counts_random = dict([(nn, np.concatenate([counts_trials[nn]])) for nn in [2, 3]]) #---pack for nn in [2, 3]: if do_randomize: results['counts_random_%d' % nn] = np.array(counts_random[nn]) results['counts_observed_%d' % nn] = np.array(counts_observed[nn]) results.pop('rxmlook_rand', None) #---save rxmlook for counting lipids for mn in range(2): results['monolayer_residues_%d' % mn] = rxmlook[mn] return results, attrs
def plot_height_proximity_correlation(**kwargs): """ Plot the instantaneous membrane height vs proximity to protein points. """ import seaborn as sb # stash to globals to iterate the plot aesthetics if 'post' not in globals(): global post, mesh, vecs, protein_pts post = {} sample_rate = 1 for sn in sns: # points_all for the dimer simulations has dimensions frames, monomer, points, xyz protein_pts = data_prot[sn]['data']['points_all'] try: vecs = data[sn]['data']['vecs'] except: import ipdb ipdb.set_trace() nframes = len(vecs) mesh = data[sn]['data']['mesh'].mean(axis=0) ngrid = mesh.shape[-2:] mesh -= np.tile( mesh.reshape(nframes, -1).mean(axis=1), (ngrid[0], ngrid[1], 1)).transpose((2, 0, 1)) incoming = basic_compute_loop( compute_protein_proximity_height_correlation, looper=[dict(fr=fr) for fr in range(0, nframes, sample_rate)]) post[sn] = dict(sizes=[len(i) for i in incoming], incoming=np.concatenate(incoming)) # regular plot binw = 1.0 axes, fig = square_tiles(1, figsize=(8, 8)) ax = axes[0] pbc_spacing = min( [min(data[sn]['data']['vecs'].mean(axis=0)[:2]) for sn in sns]) colors = sb.color_palette("hls", len(sns)) for snum, sn in enumerate(sns): rmax, zmax = [ max([np.abs(v['incoming'][:, i]).max() for v in post.values()]) for i in range(2) ] bins = np.arange(0, rmax + binw, binw) rate = 1 sample = post[sn]['incoming'][::rate] binned = [ sample[np.all( (sample.T[0] >= bins[ii], sample.T[0] <= bins[ii + 1]), axis=0)][:, 1] for ii, i in enumerate(bins[:-1]) ] means = np.array([np.mean(i) for i in binned]) stds = np.array([np.std(i) for i in binned]) ax.plot(bins[:-1], means, label=sn, color=colors[snum]) ax.fill_between(bins[:-1], means - stds, means + stds, alpha=0.1, color=colors[snum]) # there is very little difference between doing the expensive PBC ax.set_xlim((0., pbc_spacing / 2.)) ax.axhline(0, c='k', lw=1) plt.legend() plt.savefig(os.path.join(work.plotdir, 'fig.height_proximity.png'))
nmol = len(m2i) #---note that depending on the PBC links we get a variable number of points_inside = np.array([ lipid_mesh['%d.%d.points' % (top_mono, fr)][:nmol] for fr in range(nframes) ]) windows = np.array([ np.arange(j, j + smooth_window) for j in np.arange(0, nframes - smooth_window) ]) points_inside_smooth = np.array( [points_inside[w].mean(axis=0) for w in windows]) #---render in parallel basic_compute_loop( compute_function=render_hydrogen_bonding_pattern, looper=[ dict(fr=fr, frameno=frameno) for frameno, fr in enumerate(valid_frames) ], run_parallel=True, debug=False) #---render when complete try: # https://superuser.com/questions/1005315/interpolation-with-ffmpeg cmd = 'ffmpeg -i "snap.%05d.v1.png" ' + 'mov.hydrogen_bonding_pattern.%s' % sn + '.mp4' bash(cmd, cwd=out_dn) except: status('failed to render the video. try "%s" in %s' % (cmd, out_dn)) del lipid_mesh
def hydration(grofile,trajfile,**kwargs): """ Hydration code revamped from simuluxe on 2017.6.21. """ #---unpack sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] debug = kwargs.get('debug',False) run_parallel = kwargs.get('run_parallel',True) start_job_time = time.time() #---prepare universe uni = MDAnalysis.Universe(grofile,trajfile) nframes = len(uni.trajectory) lenscale = 10. #---get selections if ',' in work.meta[sn]['cation']: cation = work.meta[sn]['cation_relevant'] else: cation = work.meta[sn]['cation'] sel_ions = uni.select_atoms('name %s'%cation) sel_lipids_str = ' or '.join(['resname %s'%i for i in work.vars['selectors']['resnames_lipid']]) sel_lipids = uni.select_atoms(sel_lipids_str) #---atom subselection atom_filter = calc['specs'].get('atom_filter',None) if atom_filter: sel_lipids = uni.select_atoms('(%s) and (%s)'%(sel_lipids_str,' or '.join(['name %s'%i for i in np.unique([n for n in sel_lipids.names if re.match(atom_filter,n)])]))) #---handle the distance metric distance_metric = calc['specs'].get('distance_metric',None) #---pass the distance metric to the distance finder distance_args = {'distance_metric':distance_metric} #---we use the water oxygen only sel_water = uni.select_atoms('name OW') global pts_ions,pts_water,pts_lipids,vecs,midplanes #---the height distance metric needs the average z for each frame if distance_metric=='z': midplanes = np.array([i.mean() for i in kwargs['upstream']['undulations']['mesh'].mean(axis=0)]) #---cache the points pts_ions = np.zeros((nframes,len(sel_ions),3)) pts_lipids = np.zeros((nframes,len(sel_lipids),3)) pts_water = np.zeros((nframes,len(sel_water),3)) vecs = np.zeros((nframes,3)) start = time.time() for fr in range(nframes): status('caching coordinates',tag='compute',i=fr,looplen=nframes,start=start) uni.trajectory[fr] pts_ions[fr] = sel_ions.positions/lenscale pts_lipids[fr] = sel_lipids.positions/lenscale pts_water[fr] = sel_water.positions/lenscale vecs[fr] = uni.dimensions[:3]/lenscale #---prepare arguments for the compute functions hydration_cutoff = work.vars['hydration_cutoffs'][cation]/lenscale out_args = dict(cutoff=hydration_cutoff) if False: if debug: fr = 36 shell_counts = shell_counter(fr,**out_args) near_lipids = minimum_distances(fr,**distance_args) import ipdb;ipdb.set_trace() sys.exit() #---compute the waters in the shell shell_counts = basic_compute_loop( compute_function=shell_counter, looper=[dict(fr=fr,**out_args) for fr in range(nframes)], run_parallel=run_parallel,debug=None) #---select valid frames valid_frames_shell_counts = np.array([i for i in range(len(shell_counts)) if len(shell_counts[i])>0]) shell_counts = np.array(shell_counts)[valid_frames_shell_counts.astype(int)] #---for each ion get the minimum distance to any lipid near_lipids = basic_compute_loop( compute_function=minimum_distances, looper=[dict(fr=fr,**distance_args) for fr in range(nframes)], run_parallel=run_parallel,debug=None) #---select valid frames valid_frames_near_lipids = np.array([i for i in range(len(near_lipids)) if len(near_lipids[i])>0]) near_lipids = np.array(near_lipids)[valid_frames_near_lipids.astype(int)] if False: #---for each ion count the waters within the shell start = time.time() if run_parallel: shell_counts = Parallel(n_jobs=8,verbose=10 if debug else 0)( delayed(shell_counter,has_shareable_memory)(fr,**out_args) for fr in framelooper(nframes,start=start)) else: shell_counts = [] for fr in framelooper(nframes): shell_counts.append(shell_counter(fr,**out_args)) valid_frames_shell_counts = np.array([i for i in range(len(shell_counts)) if len(shell_counts[i])>0]) if len(valid_frames_shell_counts)==0: print('something is amiss you have no valid frames') import ipdb;ipdb.set_trace() shell_counts = np.array(shell_counts)[valid_frames_shell_counts] if False: #---! note some repetition in the debug/parallel/serial blocks in many functions #---for each ion get the minimum distance to any lipid start = time.time() if run_parallel: near_lipids = Parallel(n_jobs=8,verbose=10 if debug else 0)( delayed(minimum_distances,has_shareable_memory)(fr,**distance_args) for fr in framelooper(nframes,start=start)) else: near_lipids = [] for fr in framelooper(nframes): near_lipids.append(minimum_distances(fr,**distance_args)) valid_frames_near_lipids = np.array([i for i in range(len(near_lipids)) if len(near_lipids[i])>0]) near_lipids = np.array(near_lipids)[valid_frames_near_lipids] #---package the dataset result,attrs = {},{} #---everything is indexed by idx attrs['hydration_cutoff'] = hydration_cutoff result['nframes'] = np.array(nframes) result['shell_counts'] = shell_counts result['valid_frames_shell_counts'] = valid_frames_shell_counts result['near_lipids'] = near_lipids result['valid_frames_near_lipids'] = valid_frames_near_lipids status('compute job lasted %.1fmin'%((time.time()-start_job_time)/60.),tag='time') return result,attrs
def electron_density_profiles(**kwargs): """ Compute the electron density profiles. """ global vecs, coords, nbins, groups, midpoint, charges # hardcoded settings chargedict = { '^N(?!A$)': 7, '^C[0-9]+': 6, '^CL$': 17, '^H': 1, '^O': 8, '^P': 15, '^Cal': 18, '^MG': 10, '^NA': 11, '^S': 16, 'K': 18 } # we consider residues then the following regular expressions group_regexes = kwargs['calc']['specs'].get( 'extra_regexes', ['.+', '^(OW)|(HW(1|2))$', '^C[0-9]+']) # get the reference z for each frame bilayer_coms = kwargs['upstream']['lipid_abstractor']['points'] imono = kwargs['upstream']['lipid_abstractor']['monolayer_indices'] #! assume upstream lipid_abstractor is correct and the resulting points are not broken over PBCs midpoint = np.array([ bilayer_coms[:, imono == mn][:, :, 2].mean(axis=1) for mn in range(2) ]).mean(axis=0) # get the trajectory grofile, trajfile = kwargs['structure'], kwargs['trajectory'] uni = MDAnalysis.Universe(grofile, trajfile) nframes = len(uni.trajectory) # MDAnalysis uses Angstroms not nm lenscale = 10. # choose a number of bins bin_size = kwargs['calc']['specs']['bin_size'] vecs_upstream = kwargs['upstream']['lipid_abstractor']['vecs'] # round the number of bins to ensure everything is flush nbins = np.round(vecs_upstream[:, 2].mean() / bin_size).astype(int) # collect coordinates sel = uni.select_atoms('all') # assign charges namelist = uni.atoms.names resnamelist = list(set(uni.atoms.resnames)) # charge dictionary for the atoms in this particular system chargedict_obs = dict([ (name, [chargedict[key] for key in chargedict if re.match(key, name)]) for name in np.unique(namelist) ]) unclear_charges = dict([(key, val) for key, val in chargedict_obs.items() if len(val) != 1]) if any(unclear_charges): raise Exception('charges for these atoms were not specified: %s' % unclear_charges) chargedict_obs = dict([(key, val[0]) for key, val in chargedict_obs.items()]) charges = np.array([chargedict_obs[n] for n in namelist]) # identify atoms for each residue type groups = [np.where(uni.atoms.resnames == r)[0] for r in resnamelist] groups += [ np.array([i for i, j in enumerate(namelist) if re.match(reg, j)]) for reg in group_regexes ] # cache the points coords = np.zeros((nframes, len(sel), 3)) vecs = np.zeros((nframes, 3)) for fr in range(nframes): status('loading frame', tag='load', i=fr, looplen=nframes) uni.trajectory[fr] vecs[fr] = uni.trajectory[fr].dimensions[:3] / lenscale coords[fr] = np.array(sel.positions) / lenscale # make sure vectors are the same if not np.all(vecs == vecs_upstream): raise Exception('vectors do not match upstream lipid_abstractor') # compute looper = [dict(fr=fr) for fr in range(nframes)] incoming = basic_compute_loop(compute_edp_single, looper, run_parallel=True) # pack results, attrs = {}, {} attrs['group_regexes'] = group_regexes for gnum, group in enumerate(groups): results['group_%d' % gnum] = group results['resnames'] = resnamelist results['tabulated'] = np.array(incoming) attrs['bin_size'] = bin_size results['midpoint'] = midpoint attrs['nbins'] = nbins results['vecs'] = vecs results['charges'] = charges return results, attrs
def hydration_distribution(grofile, trajfile, **kwargs): """ Compute the radial distribution function (RDF) a.k.a g(r) of water around ions but filter these distributions """ #---unpack sn = kwargs['sn'] work = kwargs['workspace'] calc = kwargs['calc'] debug = kwargs.get('debug', False) run_parallel = kwargs.get('run_parallel', True) start_job_time = time.time() #---nearest water distances to calculate knn = calc['specs'].get('k_nearest_waters', 200) #---prepare universe uni = MDAnalysis.Universe(grofile, trajfile) nframes = len(uni.trajectory) lenscale = 10. #---collect selection strings lipid_resnames = get_lipid_resnames() cation_names = work.meta[sn].get('cations', work.meta[sn].get('cation', None)) if type(cation_names) != list: cation_names = [cation_names] #---define selections sel_proxy = uni.select_atoms(' or '.join( ['resname %s' % i for i in lipid_resnames])) sel_subject = uni.select_atoms(' or '.join( ['name %s' % i for i in cation_names])) #---we use oxygen to denote the water sel_water = uni.select_atoms('resname SOL and name OW') #---prepare coordinates for each frame st = time.time() global vecs, subject_coords, proxy_coords, water_coords vecs, subject_coords, proxy_coords, water_coords = [], [], [], [] #---purposefully profligate with the memory so this goes quickly for fr in range(nframes): status('caching coordinates', tag='compute', i=fr, looplen=nframes, start=st) uni.trajectory[fr] vecs.append(uni.dimensions[:3] / lenscale) subject_coords.append(sel_subject.positions / lenscale) proxy_coords.append(sel_proxy.positions / lenscale) water_coords.append(sel_water.positions / lenscale) status('completed caching in %.1f minutes' % ((time.time() - st) / 60.), tag='status') #---convert back to advanced indexing aind = lambda x: tuple(x.T) water_distances, lipid_distances, valid_frames = [], [], [] #---loop over frames st = time.time() looper = [dict(fr=fr, knn=knn) for fr in range(nframes)] incoming = basic_compute_loop(hydration_distribution_framewise, looper=looper) water_distances, lipid_distances, valid_frames = zip(*incoming) valid_frames = [fr for fr in valid_frames if fr != None] water_distances = [water_distances[fr] for fr in valid_frames] lipid_distances = [lipid_distances[fr] for fr in valid_frames] #---package the dataset result, attrs = {}, {} result['water_distances'] = np.array(water_distances) result['lipid_distances'] = np.array(lipid_distances) result['valid_frames'] = valid_frames result['nframes'] = np.array(nframes) result['cation_resids'] = sel_subject.resids return result, attrs