Пример #1
0
def save(confs_by_state, states, style, format, outdir):
    "Save the results to disk"

    if style == 'sep':
        for i, trj in enumerate(confs_by_state):
            for j in xrange(len(trj)):

                fn = os.path.join(outdir, 'State%d-%d.%s' % (states[i], j,
                                                             format))
                arglib.die_if_path_exists(fn)

                logger.info("Saving file: %s" % fn)
                trj[j].save(fn)

    elif style == 'tps':
        for i, trj in enumerate(confs_by_state):
            fn = os.path.join(outdir, 'State%d.%s' % (states[i], format))
            arglib.die_if_path_exists(fn)

            logger.info("Saving file: %s" % fn)
            trj.save(fn)

    elif style == 'one':
        fn = os.path.join(outdir, 'Confs.%s' % format)
        arglib.die_if_path_exists(fn)

        logger.info("Saving file: %s" % fn)
        concatenate_trajectories(confs_by_state).save(fn)

    else:
        raise ValueError('Invalid style: %s' % style)
def save(confs_by_state, states, style, format, outdir):
    "Save the results to disk"

    if style == 'sep':
        for i, trj in enumerate(confs_by_state):
            for j in xrange(len(trj)):

                fn = os.path.join(outdir,
                                  'State%d-%d.%s' % (states[i], j, format))
                arglib.die_if_path_exists(fn)

                logger.info("Saving file: %s" % fn)
                trj[j].save(fn)

    elif style == 'tps':
        for i, trj in enumerate(confs_by_state):
            fn = os.path.join(outdir, 'State%d.%s' % (states[i], format))
            arglib.die_if_path_exists(fn)

            logger.info("Saving file: %s" % fn)
            trj.save(fn)

    elif style == 'one':
        fn = os.path.join(outdir, 'Confs.%s' % format)
        arglib.die_if_path_exists(fn)

        logger.info("Saving file: %s" % fn)
        concatenate_trajectories(confs_by_state).save(fn)

    else:
        raise ValueError('Invalid style: %s' % style)
Пример #3
0
def run(project, assignments, conformations_per_state, states, output_dir,
        gens_file, atom_indices, permute_indices, alt_indices, total_memory):
    if states == "all":
        states = np.arange(assignments.max() + 1)
    # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )}
    inverse_assignments = defaultdict(lambda: [])
    for i in xrange(assignments.shape[0]):
        for j in xrange(assignments.shape[1]):
            inverse_assignments[assignments[i, j]].append((i, j))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    print "Setting up the metric."
    rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    # This trickery allows us to get the correct number of leading
    # zeros in the output file name no matter how many generators we have
    digits = len(str(max(states)))
    # Create a trajectory of generators and prepare it.
    if os.path.exists(gens_file):
        gens_traj = Trajectory.load_trajectory_file(gens_file)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '\"Generator-%%0%ii.pdb\"' % digits

    formstr_xtc = '\"Cluster-%%0%ii.xtc\"' % digits
    print "Loading up the trajectories."
    traj_nfiles, traj_bytes = get_size(project['TrajFilePath'])
    LoadAll = 0
    MaxMem = 0.0
    # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk.
    if (
            traj_bytes * 5
    ) < total_memory * 1073741824:  # It looks like the Python script uses roughly 5x the HDF file size in terms of memory.
        print "Loading all trajectories into memory."
        LoadAll = 1
        AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])]
        #print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576)

    if not os.path.exists(gens_file):
        if not 'AllTraj' in locals():
            raise Exception((
                'To get away with not supplying a Gens.lh5 structure to align to for each state '
                'you need to have enough memory to load all the trajectories simultaniously. This could be worked around...'
            ))
        print 'Randomly Sampling from state for structure to align everything to'
        centers_list = []
        for s in states:
            chosen = inverse_assignments[np.random.randint(
                len(inverse_assignments[s]))]
            centers_list.append(AllTraj[chosen[0]][chosen[1]])
        gens_traj = concatenate_trajectories(centers_list)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '\"Center-%%0%ii.pdb\"' % digits

    cluster_traj = project.GetEmptyTrajectory()
    # Loop through the generators.
    for s in states:
        if len(inverse_assignments[s]) == 0:
            raise ValueError('No assignments to state! %s' % s)
        if conformations_per_state == 'all':
            confs = inverse_assignments[s]
        else:
            random.shuffle(inverse_assignments[s])
            if len(inverse_assignments[s]) >= conformations_per_state:
                confs = inverse_assignments[s][0:conformations_per_state]
            else:
                confs = inverse_assignments[s]
                print 'Not enough assignments in state %s' % s
        FrameDict = {}
        for (traj, frame) in confs:
            FrameDict.setdefault(traj, []).append(frame)
        # Create a single trajectory corresponding to the frames that
        # belong to the current generator.
        if "XYZList" in cluster_traj:
            cluster_traj.pop("XYZList")
        print "Generator %i" % s,
        TrajNums = set([i[0] for i in confs])
        for i in TrajNums:
            if LoadAll:
                T = AllTraj[i][np.array(FrameDict[i])]
            else:
                T = project.LoadTraj(i)[np.array(FrameDict[i])]
            cluster_traj += T
        print " loaded %i conformations, aligning" % len(cluster_traj),
        # Prepare the trajectory, align to the generator, and reassign the coordinates.
        p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj)
        rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj,
                                                    p_cluster_traj, s)
        p_cluster_traj['XYZList'] = xout.copy()
        # Now save the generator / cluster to a PDB / XTC file.
        outpdb = eval(formstr_pdb) % s
        outxtc = eval(formstr_xtc) % s
        this_gen_traj = p_gens_traj[s]
        print ", saving PDB to %s" % os.path.join(output_dir, outpdb),
        this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb))
        print ", saving XTC to %s" % os.path.join(output_dir, outxtc),
        p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc))
        print ", saved"
        NowMem = float(resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss) / 1048576
        if NowMem > MaxMem:
            MaxMem = NowMem
Пример #4
0
def run(
    project,
    assignments,
    conformations_per_state,
    states,
    output_dir,
    gens_file,
    atom_indices,
    permute_indices,
    alt_indices,
    total_memory,
):
    if states == "all":
        states = np.arange(assignments.max() + 1)
    # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )}
    inverse_assignments = defaultdict(lambda: [])
    for i in xrange(assignments.shape[0]):
        for j in xrange(assignments.shape[1]):
            inverse_assignments[assignments[i, j]].append((i, j))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    print "Setting up the metric."
    rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    # This trickery allows us to get the correct number of leading
    # zeros in the output file name no matter how many generators we have
    digits = len(str(max(states)))
    # Create a trajectory of generators and prepare it.
    if os.path.exists(gens_file):
        gens_traj = Trajectory.load_trajectory_file(gens_file)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '"Generator-%%0%ii.pdb"' % digits

    formstr_xtc = '"Cluster-%%0%ii.xtc"' % digits
    print "Loading up the trajectories."
    traj_nfiles, traj_bytes = get_size(project["TrajFilePath"])
    LoadAll = 0
    MaxMem = 0.0
    # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk.
    if (
        traj_bytes * 5
    ) < total_memory * 1073741824:  # It looks like the Python script uses roughly 5x the HDF file size in terms of memory.
        print "Loading all trajectories into memory."
        LoadAll = 1
        AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])]
        # print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576)

    if not os.path.exists(gens_file):
        if not "AllTraj" in locals():
            raise Exception(
                (
                    "To get away with not supplying a Gens.lh5 structure to align to for each state "
                    "you need to have enough memory to load all the trajectories simultaniously. This could be worked around..."
                )
            )
        print "Randomly Sampling from state for structure to align everything to"
        centers_list = []
        for s in states:
            chosen = inverse_assignments[np.random.randint(len(inverse_assignments[s]))]
            centers_list.append(AllTraj[chosen[0]][chosen[1]])
        gens_traj = concatenate_trajectories(centers_list)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '"Center-%%0%ii.pdb"' % digits

    cluster_traj = project.GetEmptyTrajectory()
    # Loop through the generators.
    for s in states:
        if len(inverse_assignments[s]) == 0:
            raise ValueError("No assignments to state! %s" % s)
        if conformations_per_state == "all":
            confs = inverse_assignments[s]
        else:
            random.shuffle(inverse_assignments[s])
            if len(inverse_assignments[s]) >= conformations_per_state:
                confs = inverse_assignments[s][0:conformations_per_state]
            else:
                confs = inverse_assignments[s]
                print "Not enough assignments in state %s" % s
        FrameDict = {}
        for (traj, frame) in confs:
            FrameDict.setdefault(traj, []).append(frame)
        # Create a single trajectory corresponding to the frames that
        # belong to the current generator.
        if "XYZList" in cluster_traj:
            cluster_traj.pop("XYZList")
        print "Generator %i" % s,
        TrajNums = set([i[0] for i in confs])
        for i in TrajNums:
            if LoadAll:
                T = AllTraj[i][np.array(FrameDict[i])]
            else:
                T = project.LoadTraj(i)[np.array(FrameDict[i])]
            cluster_traj += T
        print " loaded %i conformations, aligning" % len(cluster_traj),
        # Prepare the trajectory, align to the generator, and reassign the coordinates.
        p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj)
        rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj, p_cluster_traj, s)
        p_cluster_traj["XYZList"] = xout.copy()
        # Now save the generator / cluster to a PDB / XTC file.
        outpdb = eval(formstr_pdb) % s
        outxtc = eval(formstr_xtc) % s
        this_gen_traj = p_gens_traj[s]
        print ", saving PDB to %s" % os.path.join(output_dir, outpdb),
        this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb))
        print ", saving XTC to %s" % os.path.join(output_dir, outxtc),
        p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc))
        print ", saved"
        NowMem = float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576
        if NowMem > MaxMem:
            MaxMem = NowMem