def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices): #project = Project.load_from_hdf(options.projectfn) traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf) # you could replace this with your own metric if you like metric = LPRMSD(atom_indices, permute_indices, alt_indices) ppdb = metric.prepare_trajectory(pdb) ptraj = metric.prepare_trajectory(traj) print ppdb['XYZList'].shape print ptraj['XYZList'].shape distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0) print distances return distances
def run(project, assignments, conformations_per_state, states, output_dir, gens_file, atom_indices, permute_indices, alt_indices, total_memory): if states == "all": states = np.arange(assignments.max() + 1) # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )} inverse_assignments = defaultdict(lambda: []) for i in xrange(assignments.shape[0]): for j in xrange(assignments.shape[1]): inverse_assignments[assignments[i, j]].append((i, j)) if not os.path.exists(output_dir): os.makedirs(output_dir) print "Setting up the metric." rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices) # This trickery allows us to get the correct number of leading # zeros in the output file name no matter how many generators we have digits = len(str(max(states))) # Create a trajectory of generators and prepare it. if os.path.exists(gens_file): gens_traj = Trajectory.load_trajectory_file(gens_file) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '\"Generator-%%0%ii.pdb\"' % digits formstr_xtc = '\"Cluster-%%0%ii.xtc\"' % digits print "Loading up the trajectories." traj_nfiles, traj_bytes = get_size(project['TrajFilePath']) LoadAll = 0 MaxMem = 0.0 # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk. if ( traj_bytes * 5 ) < total_memory * 1073741824: # It looks like the Python script uses roughly 5x the HDF file size in terms of memory. print "Loading all trajectories into memory." LoadAll = 1 AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])] #print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576) if not os.path.exists(gens_file): if not 'AllTraj' in locals(): raise Exception(( 'To get away with not supplying a Gens.lh5 structure to align to for each state ' 'you need to have enough memory to load all the trajectories simultaniously. This could be worked around...' )) print 'Randomly Sampling from state for structure to align everything to' centers_list = [] for s in states: chosen = inverse_assignments[np.random.randint( len(inverse_assignments[s]))] centers_list.append(AllTraj[chosen[0]][chosen[1]]) gens_traj = concatenate_trajectories(centers_list) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '\"Center-%%0%ii.pdb\"' % digits cluster_traj = project.GetEmptyTrajectory() # Loop through the generators. for s in states: if len(inverse_assignments[s]) == 0: raise ValueError('No assignments to state! %s' % s) if conformations_per_state == 'all': confs = inverse_assignments[s] else: random.shuffle(inverse_assignments[s]) if len(inverse_assignments[s]) >= conformations_per_state: confs = inverse_assignments[s][0:conformations_per_state] else: confs = inverse_assignments[s] print 'Not enough assignments in state %s' % s FrameDict = {} for (traj, frame) in confs: FrameDict.setdefault(traj, []).append(frame) # Create a single trajectory corresponding to the frames that # belong to the current generator. if "XYZList" in cluster_traj: cluster_traj.pop("XYZList") print "Generator %i" % s, TrajNums = set([i[0] for i in confs]) for i in TrajNums: if LoadAll: T = AllTraj[i][np.array(FrameDict[i])] else: T = project.LoadTraj(i)[np.array(FrameDict[i])] cluster_traj += T print " loaded %i conformations, aligning" % len(cluster_traj), # Prepare the trajectory, align to the generator, and reassign the coordinates. p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj) rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj, p_cluster_traj, s) p_cluster_traj['XYZList'] = xout.copy() # Now save the generator / cluster to a PDB / XTC file. outpdb = eval(formstr_pdb) % s outxtc = eval(formstr_xtc) % s this_gen_traj = p_gens_traj[s] print ", saving PDB to %s" % os.path.join(output_dir, outpdb), this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb)) print ", saving XTC to %s" % os.path.join(output_dir, outxtc), p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc)) print ", saved" NowMem = float(resource.getrusage( resource.RUSAGE_SELF).ru_maxrss) / 1048576 if NowMem > MaxMem: MaxMem = NowMem
def run( project, assignments, conformations_per_state, states, output_dir, gens_file, atom_indices, permute_indices, alt_indices, total_memory, ): if states == "all": states = np.arange(assignments.max() + 1) # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )} inverse_assignments = defaultdict(lambda: []) for i in xrange(assignments.shape[0]): for j in xrange(assignments.shape[1]): inverse_assignments[assignments[i, j]].append((i, j)) if not os.path.exists(output_dir): os.makedirs(output_dir) print "Setting up the metric." rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices) # This trickery allows us to get the correct number of leading # zeros in the output file name no matter how many generators we have digits = len(str(max(states))) # Create a trajectory of generators and prepare it. if os.path.exists(gens_file): gens_traj = Trajectory.load_trajectory_file(gens_file) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '"Generator-%%0%ii.pdb"' % digits formstr_xtc = '"Cluster-%%0%ii.xtc"' % digits print "Loading up the trajectories." traj_nfiles, traj_bytes = get_size(project["TrajFilePath"]) LoadAll = 0 MaxMem = 0.0 # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk. if ( traj_bytes * 5 ) < total_memory * 1073741824: # It looks like the Python script uses roughly 5x the HDF file size in terms of memory. print "Loading all trajectories into memory." LoadAll = 1 AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])] # print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576) if not os.path.exists(gens_file): if not "AllTraj" in locals(): raise Exception( ( "To get away with not supplying a Gens.lh5 structure to align to for each state " "you need to have enough memory to load all the trajectories simultaniously. This could be worked around..." ) ) print "Randomly Sampling from state for structure to align everything to" centers_list = [] for s in states: chosen = inverse_assignments[np.random.randint(len(inverse_assignments[s]))] centers_list.append(AllTraj[chosen[0]][chosen[1]]) gens_traj = concatenate_trajectories(centers_list) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '"Center-%%0%ii.pdb"' % digits cluster_traj = project.GetEmptyTrajectory() # Loop through the generators. for s in states: if len(inverse_assignments[s]) == 0: raise ValueError("No assignments to state! %s" % s) if conformations_per_state == "all": confs = inverse_assignments[s] else: random.shuffle(inverse_assignments[s]) if len(inverse_assignments[s]) >= conformations_per_state: confs = inverse_assignments[s][0:conformations_per_state] else: confs = inverse_assignments[s] print "Not enough assignments in state %s" % s FrameDict = {} for (traj, frame) in confs: FrameDict.setdefault(traj, []).append(frame) # Create a single trajectory corresponding to the frames that # belong to the current generator. if "XYZList" in cluster_traj: cluster_traj.pop("XYZList") print "Generator %i" % s, TrajNums = set([i[0] for i in confs]) for i in TrajNums: if LoadAll: T = AllTraj[i][np.array(FrameDict[i])] else: T = project.LoadTraj(i)[np.array(FrameDict[i])] cluster_traj += T print " loaded %i conformations, aligning" % len(cluster_traj), # Prepare the trajectory, align to the generator, and reassign the coordinates. p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj) rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj, p_cluster_traj, s) p_cluster_traj["XYZList"] = xout.copy() # Now save the generator / cluster to a PDB / XTC file. outpdb = eval(formstr_pdb) % s outxtc = eval(formstr_xtc) % s this_gen_traj = p_gens_traj[s] print ", saving PDB to %s" % os.path.join(output_dir, outpdb), this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb)) print ", saving XTC to %s" % os.path.join(output_dir, outxtc), p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc)) print ", saved" NowMem = float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576 if NowMem > MaxMem: MaxMem = NowMem