Exemplo n.º 1
0
def load_trjs_or_features(args):

    if args.features:
        lengths, data = load_features(args.features, stride=args.subsample)
    else:
        assert args.trajectories
        assert len(args.trajectories) == len(args.topologies)

        targets = {
            os.path.basename(topf): "%s files" % len(trjfs)
            for topf, trjfs in zip(args.topologies, args.trajectories)
        }
        logger.info("Beginning clustering; targets:\n%s",
                    json.dumps(targets, indent=4))

        with timed("Loading trajectories took %.1f s.", logger.info):
            lengths, xyz, select_top = load_trajectories(
                args.topologies,
                args.trajectories,
                selections=args.atoms,
                stride=args.subsample,
                processes=auto_nprocs())

        logger.info("Clustering using %s atoms matching '%s'.", xyz.shape[1],
                    args.atoms)

        # md.rmsd requires an md.Trajectory object, so wrap `xyz` in
        # the topology.
        data = md.Trajectory(xyz=xyz, topology=select_top)

    return lengths, data
Exemplo n.º 2
0
def load_trajectories(topologies, trajectories, selections, stride, processes):

    for top, selection in zip(topologies, selections):
        sentinel_trj = md.load(top)
        try:
            # noop, but causes fast-fail w/bad args.atoms
            sentinel_trj.top.select(selection)
        except:
            raise exception.ImproperlyConfigured(
                ("The provided selection '{s}' didn't match the topology "
                 "file, {t}").format(s=selection, t=top))

    flat_trjs = []
    configs = []
    n_inds = None

    for topfile, trjset, selection in zip(topologies, trajectories,
                                          selections):
        top = md.load(topfile).top
        indices = top.select(selection)

        if n_inds is not None:
            if n_inds != len(indices):
                raise exception.ImproperlyConfigured(
                    ("Selection on topology %s selected %s atoms, but "
                     "other selections selected %s atoms.") %
                    (topfile, len(indices), n_inds))
        n_inds = len(indices)

        for trj in trjset:
            flat_trjs.append(trj)
            configs.append({
                'top': top,
                'stride': stride,
                'atom_indices': indices,
            })

    logger.info(
        "Loading %s trajectories with %s atoms using %s processes "
        "(subsampling %s)", len(flat_trjs), len(top.select(selection)),
        processes, stride)
    assert len(top.select(selection)) > 0, "No atoms selected for clustering"

    with timed("Loading took %.1f sec", logger.info):
        lengths, xyz = mpi.io.load_trajectory_as_striped(
            flat_trjs, args=configs, processes=auto_nprocs())

    with timed("Turned over array in %.2f min", logger.info):
        tmp_xyz = xyz.copy()
        del xyz
        xyz = tmp_xyz

    logger.info("Loaded %s frames.", len(xyz))

    return lengths, xyz, top.subset(top.select(selection))
Exemplo n.º 3
0
def process_command_line(argv):
    '''Parse the command line and do a first-pass on processing them into a
    format appropriate for the rest of the script.'''

    parser = argparse.ArgumentParser(formatter_class=argparse.
                                     ArgumentDefaultsHelpFormatter)

    parser.add_argument(
        "--assignments", required=True,
        help="File containing assignments to states.")
    parser.add_argument(
        "--n-eigenvalues", default=5, type=int,
        help="Number of eigenvalues to compute for each lag time.")
    parser.add_argument(
        "--lag-times",  default="5:100:2",
        help="List of lagtimes (in frames) to compute eigenspectra for. "
             "Format is min:max:step.")
    parser.add_argument(
        "--symmetrization", default="transpose",
        choices=['transpose', 'row_normalize', 'prior_counts'],
        help="The method to use to fit transition probabilities from "
             "the transition counts matrix.")
    parser.add_argument(
        "--trj-ids", default=None,
        help="Computed the implied timescales for only the given "
             "trajectory ids. This is useful for handling assignments "
             "for shared state space clusterings.")
    parser.add_argument(
        "--processes", default=max(1, auto_nprocs()/4), type=int,
        help="Number of processes to use. Because eigenvector "
             "decompositions are thread-parallelized, this should "
             "usually be several times smaller than the number of "
             "cores availiable on your machine.")
    parser.add_argument(
        "--trim", default=False, action="store_true",
        help="Turn ergodic trimming on.")

    parser.add_argument(
        "--timestep", default=None, type=float,
        help='A conversion between frames and nanoseconds (i.e. frames '
             'per nanosecond) to scale the axes to physical units '
             '(rather than frames).')
    parser.add_argument(
        "--infer-timestep", default=None,
        help="An example trajectory from which to infer the conversion "
             "from frame to nanoseconds.")

    parser.add_argument(
        "--plot", default=None,
        help="Path for the implied timescales plot.")
    parser.add_argument(
        "--logscale", action='store_true',
        help="Flag to output y-axis log scale plot.")

    args = parser.parse_args(argv[1:])

    args.lag_times = range(*map(int, args.lag_times.split(':')))

    if args.trj_ids is not None:
        args.trj_ids = slice(*map(int, args.trj_ids.split(':')))

    if args.symmetrization == 'prior_counts':
        args.symmetrization = prior_counts
    else:
        args.symmetrization = getattr(builders, args.symmetrization)

    return args
Exemplo n.º 4
0
def process_command_line(argv):
    '''Parse the command line and do a first-pass on processing them into a
    format appropriate for the rest of the script.'''

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="Compute CARDS matricies for a set of trajectories "
        "and save all matrices and dihedral mappings.\n \n"
        "Please cite the following papers if you use CARDS with enspara:\n"
        "[1] Singh, S. and Bowman, G.R.\n"
        "    Journal of Chemical Theory and Computation\n"
        "    2017 13 (4), 1509-1517\n"
        "    DOI: 10.1021/acs.jctc.6b01181\n"
        "\n"
        "[2] Porter,J.R.,  Zimmerman, M.I., and Bowman G.R.\n"
        "    bioRxiv 431072; doi: https://doi.org/10.1101/431072\n")

    # INPUTS
    input_args = parser.add_argument_group("Input Settings")
    #input_data_group = parser.add_mutually_exclusive_group(required=True)
    input_args.add_argument(
        '--trajectories',
        required=True,
        nargs="+",
        action='append',
        help="List of paths to aligned trajectory files to cluster. "
        "All file types that MDTraj supports are supported here.")
    input_args.add_argument('--topology',
                            required=True,
                            action='append',
                            help="The topology file for the trajectories.")

    # PARAMETERS
    cards_args = parser.add_argument_group("CARDS Settings")
    cards_args.add_argument(
        '--buffer-size',
        default=15,
        type=int,
        help="Size of buffer zone between rotameric states, in degrees.")
    cards_args.add_argument("--processes",
                            default=max(1,
                                        auto_nprocs() / 4),
                            type=int,
                            help="Number of processes to use.")

    # OUTPUT
    output_args = parser.add_argument_group("Output Settings")
    output_args.add_argument(
        '--matrices',
        required=True,
        action=readable_dir,
        help="The folder location to write the four CARDS matrices (as pickle)."
    )
    output_args.add_argument(
        '--indices',
        required=True,
        action=readable_dir,
        help="The location to write the dihedral indices file (as CSV).")

    args = parser.parse_args(argv[1:])

    # CARDS FEATURES
    if not (0 < args.buffer_size < 360):
        raise exception.ImproperlyConfigured(
            "The given buffer size (%s) is not possible." % args.buffer_size)

    return args