Пример #1
0
 def test_a_ConvertDataToHDF(self):
     os.chdir(WorkingDir)
     shutil.copy(PDBFn,"./")
                 #def run(projectfn, PDBfn, InputDir, source, mingen, stride, rmsd_cutoff,  parallel='None'):
     ConvertDataToHDF.run(ProjectFn, PDBFn, TutorialDir+"/XTC", "file", 0, 1, None)
     P1 = Project.load_from(ProjectFn)
     
     r_P1 = Project.load_from(os.path.abspath(os.path.join('..', ReferenceDir, ProjectFn)))
     
     eq_(P1.n_trajs, r_P1.n_trajs)
     npt.assert_equal(P1.traj_lengths, r_P1.traj_lengths)
     eq_(os.path.basename(P1.traj_filename(0)), os.path.basename(r_P1.traj_filename(0)))
Пример #2
0
    def test_a_ConvertDataToHDF(self):
        os.chdir(WorkingDir)
        shutil.copy(PDBFn, "./")
        #def run(projectfn, PDBfn, InputDir, source, mingen, stride, rmsd_cutoff,  parallel='None'):
        ConvertDataToHDF.run(ProjectFn, PDBFn, TutorialDir + "/XTC", "file", 0,
                             1, None)
        P1 = Project.load_from(ProjectFn)

        r_P1 = Project.load_from(
            os.path.abspath(os.path.join('..', ReferenceDir, ProjectFn)))

        eq_(P1.n_trajs, r_P1.n_trajs)
        npt.assert_equal(P1.traj_lengths, r_P1.traj_lengths)
        eq_(os.path.basename(P1.traj_filename(0)),
            os.path.basename(r_P1.traj_filename(0)))
Пример #3
0
def run(projectfn, conf_filename, input_dir, source, min_length, stride, rmsd_cutoff, atom_indices, iext):

    # check if we are doing an update or a fresh run
    # if os.path.exists(projectfn):
    #     logger.info("Found project info file encoding previous work, running in update mode...")
    #     update = True
    # else:
    #     update = False
    #
    # logger.info("Looking for %s style data in %s", source, input_dir)
    # if update:
    #     raise NotImplementedError("Ack! Update mode is not yet ready yet.")

    # if the source is fah, we'll use some special FaH specific loading functions
    # to (1) try to recover in case of errors and (2) load the specific directory
    # hierarchy of FaH (RUN/CLONE/GEN/frame.xtc)
    if os.path.exists(projectfn):
        project = Project.load_from(projectfn)
        logger.warn(
            "%s exists, will modify it and update the trajectories in %s",
            projectfn, '/'.join(project._traj_paths[0].split('/')[:-1]))
    else:
        project = None

    if source.startswith('file'):
        pb = ProjectBuilder(
            input_dir, input_traj_ext=iext, conf_filename=conf_filename,
            stride=stride, project=project, atom_indices=atom_indices)
    elif source == 'fah':
        pb = FahProjectBuilder(
            input_dir, input_traj_ext=iext, conf_filename=conf_filename,
            stride=stride, project=project, atom_indices=atom_indices)
    else:
        raise ValueError("Invalid argument for source: %s" % source)

    # check that trajectories to not go farther than a certain RMSD
    # from the PDB. Useful to check for blowing up or other numerical
    # instabilities
    if rmsd_cutoff is not None:
        # TODO: this is going to use ALL of the atom_indices, including hydrogen. This is
        # probably not the desired functionality
        # KAB: Apparently needed to use correctly subsetted atom_indices here to avoid an error
        validator = validators.RMSDExplosionValidator(
            conf_filename, max_rmsd=rmsd_cutoff, atom_indices=atom_indices)
        pb.add_validator(validator)

    # Only accept trajectories with more snapshots than min_length.
    if min_length > 0:
        validator = validators.MinLengthValidator(min_length)
        pb.add_validator(validator)

    # everyone wants to be centered
    pb.add_validator(validators.TrajCenterer())

    pb.get_project().save(projectfn)
    assert os.path.exists(projectfn), '%s does not exist' % projectfn
    logger.info("Finished data conversion successfully.")
    logger.info("Generated: %s, Trajectories/", projectfn)

    return
Пример #4
0
def main():
    global data2d
    global As
    # First I need to turn the assignments matrix into a 1D list of assignments
    sys.stdout = os.fdopen(sys.stdout.fileno(),'w',0)
    print "Reading in Assignments... from %s " % options.assFN
    As = io.loadh(options.assFN)['arr_0'].astype(int)
    print "Reading in data... from %s " % options.dataFN
    try: 
        f = io.loadh( options.dataFN )
        try:
            data2d = f['arr_0']
        except:
            data2d = f['Data']
    except:
        data = load(options.dataFN)
        proj = Project.load_from( options.projFN )
        data2d = msmTools.reshapeRawData( data, proj )

    print "Calculating averages for:"

    pool = mp.Pool(options.procs)
    clusters = range( As.max() + 1)
    result = pool.map_async(calcAvg,clusters[:])
    result.wait()
    sol = result.get()
    sol = array(sol)
    savetxt(options.outFN, sol)

    return
Пример #5
0
def main():
    """Parse command line inputs, load up files, then call run() and save() to do
    the real work"""
    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    # load...
    # project
    project = Project.load_from(args.project)

    # assignments
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    # states
    if -1 in args.states:
        states = np.unique(assigments[np.where(assignments != -1)])
        logger.info('Yanking from all %d states', len(states))
    else:
        # ensure that the states are sorted, and that they're unique -- you
        # can only request each state once
        states = np.unique(args.states)
        logger.info("Yanking from the following states: %s", states)

    # extract the conformations using np.random for the randomness
    confs_by_state = project.get_random_confs_from_states(
        assignments, states=states, num_confs=args.conformations_per_state,
        replacement=args.replacement)

    # save the conformations to disk, in the requested style
    save(confs_by_state=confs_by_state, states=states, style=args.style,
         format=args.format, outdir=args.output_dir)
Пример #6
0
    def save(self):
        "Save the trajs as a n MSMBuilder project"

        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None  # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project(
            {
                'conf_filename': os.path.abspath(self.conf_filename),
                'traj_lengths': self.n_frames * np.ones(self.n_trajs),
                'traj_paths': [os.path.abspath(e) for e in traj_paths],
                'traj_converted_from': [[] for i in range(self.n_trajs)],
                'traj_errors': [None for i in range(self.n_trajs)],
            },
            project_dir=self.project_dir,
            validate=True)
        p.save(pjoin(self.project_dir, 'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir, 'Project.yaml'))
        p._validate()
        assert np.all(
            (p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
Пример #7
0
def main():
    """Parse command line inputs, load up files, and build a movie."""

    parser = arglib.ArgumentParser(description="""
Create an MSM movie by sampling a sequence of states and sampling a 
random conformation from each state in the sequence.  
""")
    parser.add_argument('project')
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument('tprob', default='Data/tProb.mtx')
    parser.add_argument('num_steps')
    parser.add_argument('starting_state', type=int, help='''Which state to start trajectory from.''')
    parser.add_argument('output', default='sample_traj.pdb', help="""The filename of your output trajectory.  The filetype suffix will be used to select the output file format.""")
    args = parser.parse_args()

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
    
    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)
    
    project = Project.load_from(args.project)    
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Пример #8
0
def main(file):
    ass=io.loadh(file)
    dir=os.path.dirname(file)
    base=os.path.basename(file)
    newdir='%s/subsample' % dir
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    p=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    data=dict()
    totals=dict()
    iterations=int(ass['arr_0'].shape[1]/10.0)
    start=max(p.traj_lengths)
    for iter in range(0, iterations):
        new=start-10
        if new < 10:
            break
        totals[new]=0
        data[new]=-numpy.ones((ass['arr_0'].shape[0], new), dtype=int)
        for i in range(0, ass['arr_0'].shape[0]):
            data[new][i]=ass['arr_0'][i][:new]
            frames=numpy.where(data[new][i]!=-1)[0]
            totals[new]+=len(frames)
        start=new

    ohandle=open('%s/times.h5' % (newdir), 'w')
    for key in sorted(data.keys()):
        print data[key].shape
        print "total time is %s" % totals[key]
        ohandle.write('%s\t%s\t%s\n' % (data[key].shape[0], data[key].shape[1], totals[key]))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('assignments', default='Macro4/MacroAssignments.h5', help='Path to an assignments file. (default=Macro4/MacroAssignments.h5)')
    parser.add_argument('--project', default='ProjectInfo.yaml', help='Path to ProjectInfo.yaml file. (default=ProjectInfo.yaml)')
    args = parser.parse_args()

    project = Project.load_from(args.project)
    t = reduce(operator.add, (project.load_traj(i) for i in range(project.n_trajs)))

    phi_angles = md.compute_dihedrals(t, [PHI_INDICES]) * 180.0 / np.pi
    psi_angles = md.compute_dihedrals(t, [PSI_INDICES]) * 180.0 / np.pi
    state_index = np.hstack(io.loadh(args.assignments)['arr_0'])

    for i in np.unique(state_index):
        pp.plot(phi_angles[np.where(state_index == i)],
                psi_angles[np.where(state_index == i)],
                'x', label='State %d' % i)


    pp.title("Alanine Dipeptide Macrostates")
    pp.xlabel(r"$\phi$")
    pp.ylabel(r"$\psi$")
    annotate()

    pp.legend(loc=1, labelspacing=0.075, prop={'size': 8.0}, scatterpoints=1,
              markerscale=0.5, numpoints=1)
    pp.xlim([-180, 180])
    pp.ylim([-180, 180])
    pp.show()
Пример #10
0
    def save(self):
        "Save the trajs as a n MSMBuilder project"
        
        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project({'conf_filename': os.path.abspath(self.conf_filename),
            'traj_lengths': self.n_frames*np.ones(self.n_trajs),
            'traj_paths': [os.path.abspath(e) for e in traj_paths],
            'traj_converted_from': [[] for i in range(self.n_trajs)],
            'traj_errors': [None for i in range(self.n_trajs)],
            }, project_dir=self.project_dir, validate=True)
        p.save(pjoin(self.project_dir,'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir,'Project.yaml'))
        p._validate()
        assert np.all((p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
Пример #11
0
def main(modeldir, genfile,  type):
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    data=dict()
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0])
    data['rmsd']=data['rmsd'][frames]
    com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'])
    data['com']=data['com'][frames]

    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)
    paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type))
    
    for p in range(0, 20):
        movie=project.empty_traj()
        path=paths['Paths'][p]
        flux=paths['fluxes'][p]/paths['fluxes'][0]
        if flux < 0.2:
            break
        print "flux %s" % flux
        frames=numpy.where(path!=-1)[0]
        path=numpy.array(path[frames], dtype=int)
        for (n, state) in enumerate(path):
            t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 20)
            if n==0:
                movie['XYZList']=t[0]['XYZList']
            else:
                movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
        movie.save_to_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p))
Пример #12
0
def main(modeldir, genfile, type, write=False):
    proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    data=dict()
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0])
    data['rmsd']=data['rmsd'][frames]
    com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'][frames])

    residues=['F36', 'H87', 'I56', 'I90', 'W59', 'Y82', 'hydrophob_dist', 'oxos_dist']
    loops=['loop1', 'loop2', 'loop3']
    for loop in loops:
        data[loop]=numpy.loadtxt('%s.%srmsd.dat' % (genfile.split('.lh5')[0], loop))
        data[loop]=data[loop][frames]
    for res in residues:
        file='%s_%spair.dat' % (genfile.split('.lh5')[0], res)
        if os.path.exists(file):
            data[res]=numpy.loadtxt(file)
            data[res]=data[res][frames]
    angles=['phi', 'omega']
    for ang in angles:
        file='%s_%s.dat' % (genfile.split('.lh5')[0], ang)
        if os.path.exists(file):
            data[ang]=numpy.loadtxt(file)
            data[ang]=data[ang][frames]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    bound=numpy.loadtxt('%s/tpt-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int)

    Tdense=T.todense()
    Tdata=dict()
    for i in unbound:
        for j in unbound:
            if Tdense[i,j]!=0:
                if i not in Tdata.keys():
                    Tdata[i]=[]
                Tdata[i].append(j)
    #print Tdata
    cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive
    Q=tpt.calculate_committors(unbound, bound, T)
    ohandle=open('%s/commitor_states.txt' % modeldir, 'w')
    for i in range(0,len(Q)):
        if Q[i]>0.40 and Q[i]<0.6:
            ohandle.write('%s\n' % i)
            #t=project.get_random_confs_from_states(ass['arr_0'], [int(i),], 20)
            #t[0].save_to_xtc('%s/commottor_state%s.xtc' % (modeldir, i))
    if write==True:
        for op in sorted(data.keys()):
            pylab.figure()
            pylab.scatter(data['com'], data[op],  c=Q, cmap=cm, alpha=0.7, s=[map_size(i) for i in Q])
            pylab.xlabel('L RMSD')
            pylab.ylabel(op)
            pylab.colorbar()
        pylab.show()
def main(assfile, lag, nproc):
    lag=int(lag)
    nproc=int(nproc)
    Assignments=io.loadh(assfile)
    num=int(assfile.split('Assignments_sub')[1].split('.h5')[0])
    dir=os.path.dirname(assfile)
    newdir='%s/boot-sub%s' % (dir, num)
    ref_sub=numpy.loadtxt('%s/times.h5' % dir, usecols=(1,))
    ref_total=numpy.loadtxt('%s/times.h5' % dir, usecols=(2,))
    times=dict()
    for (i,j) in zip(ref_sub, ref_total):
        times[i]=j

    proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    multinom=int(times[num])
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    if 'Data' in Assignments.keys():
        Assignments=Assignments['Data']
    else:
        Assignments=Assignments['arr_0']
    print Assignments.shape
    NumStates = max(Assignments.flatten()) + 1
    Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True)
    Counts=Counts.todense()
    Counts=Counts*(1.0/lag)
    T=numpy.array(Counts)
    frames=numpy.where(T==0)
    T[frames]=1
    Popsample=dict()
    iteration=0
    total_iteration=100/nproc
    print "%s total iterations" % total_iteration
    if 100 % nproc != 0:
        remain=100 % nproc
    else:
        remain=False
    print "iterating thru tCount samples"
    count=0
    while iteration < 100:
        if count*nproc > 100:
            nproc=remain
        print "sampling iteration %s" % iteration
        Tfresh=T.copy()
        input = zip([Tfresh]*nproc, [multinom]*nproc, range(0, NumStates))
        pool = multiprocessing.Pool(processes=nproc)
        result = pool.map_async(parallel_get_matrix, input)
        result.wait()
        all = result.get()
        pool.terminate()
        for c_matrix in all:
            scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), c_matrix)
            #rev_counts, t_matrix, Populations, Mapping=x
            #scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix)
            #numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations)
            #numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping)
            iteration+=1
        count+=1
        print "dont with iteration %s" % iteration*nproc
Пример #14
0
def test_rg_1():
    project = Project.load_from(os.path.join(fixtures_dir(), 'ProjectInfo.h5'))
    traj = project.load_traj(0)
    xyzlist = traj['XYZList']

    a = rgcalc.calculate_rg(xyzlist)
    b = reference_rg(xyzlist)

    npt.assert_array_almost_equal(a, b)
Пример #15
0
def test_rg_1():
    project = Project.load_from(os.path.join(fixtures_dir(), 'ProjectInfo.h5'))
    traj = project.load_traj(0)
    xyzlist = traj['XYZList']

    a = rgcalc.calculate_rg(xyzlist)
    b = reference_rg(xyzlist)
    
    npt.assert_array_almost_equal(a, b)
Пример #16
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path, distances_path)

    logger.info('All Done!')
Пример #17
0
 def test_g_GetRandomConfs(self):
     P1 = Project.load_from(ProjectFn)
     Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')
     
     # make a predictable stream of random numbers by seeding the RNG with 42
     random_source = np.random.RandomState(42)
     randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source)
     
     reference = Trajectory.load_trajectory_file(os.path.join(ReferenceDir, "2RandomConfs.lh5"))
     self.assert_trajectories_equal(reference, randomconfs)
Пример #18
0
def load_trajectories(projectfn, stride):
    project = Project.load_from(projectfn)

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride)
        list_of_trajs.append(traj)

    return list_of_trajs
def main(assfile, lag, nproc):
    lag=int(lag)
    nproc=int(nproc)
    Assignments=io.loadh(assfile)
    dir=os.path.dirname(assfile)
    newdir='%s/sample-counts' % dir
    proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    multinom=sum(proj.traj_lengths)
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    if 'Data' in Assignments.keys():
        Assignments=Assignments['Data']
    else:
        Assignments=Assignments['arr_0']
    print Assignments.shape
    NumStates = max(Assignments.flatten()) + 1
    Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True)
    Counts=Counts.todense()
    Counts=Counts*(1.0/lag)
    T=numpy.array(Counts)
    frames=numpy.where(T==0)
    T[frames]=1
    Popsample=dict()
    iteration=0
    total_iteration=100/nproc
    print "%s total iterations" % total_iteration
    if 100 % nproc != 0:
        remain=100 % nproc
    else:
        remain=False
    print "iterating thru tCount samples"
    count=0
    while iteration < 100:
        if count*nproc > 100:
            nproc=remain
        print "sampling iteration %s" % iteration
        Tfresh=T.copy()
        counts=range(0, nproc)
        input = zip([Tfresh]*nproc, [multinom]*nproc, [NumStates]*nproc, counts)
        pool = multiprocessing.Pool(processes=nproc)
        result = pool.map_async(parallel_get_matrix, input)
        result.wait()
        all = result.get()
        print "computed resampled matrices"
        pool.terminate()
        for count_matrix in all:
            #rev_counts, t_matrix, Populations, Mapping=x
            scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), count_matrix)
           # scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix)
           # numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations)
           # numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping)
            iteration+=1
        count+=1
        print "dont with iteration %s" % iteration*nproc
Пример #20
0
def load_trajectories(projectfn, stride):
    project = Project.load_from(projectfn)

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride)
        list_of_trajs.append(traj)

    return list_of_trajs
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Пример #22
0
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Пример #23
0
def main(modeldir, start, type):
    start=int(start)
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0])
    pdb=files[0]
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    T=mmread('%s/tProb.mtx' % modeldir)
    startstate=unbound[start]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    steps=100000
    print "on start state %s" % startstate
    if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)):
        print "loading from states"
        traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate))
    else:
        traj=msm_analysis.sample(T, int(startstate),int(steps))
        numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj)
    print "checking for chkpt file"
    checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate))
    if len(checkfile) > 0:
        movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb)
        n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0])
        os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0]))
        print "checkpointing at state index %s out of %s" % (n, len(traj))
        checkfile=checkfile[0]
        restart=True
    else:
        restart=False
        n=0
        movie=project.empty_traj()
    while n < len(traj):
        print "on state %s" % n
        state=int(traj[n])
        t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10)
        if n==0:
            movie['XYZList']=t[0]['XYZList']
            n+=1
            continue
        elif n % 100==0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            if restart==True:
                os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0]))
            movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n))
            checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)
            n+=1
            continue
        elif n!=0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            n+=1
            continue
    movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
Пример #24
0
    def test_g_GetRandomConfs(self):
        P1 = Project.load_from(ProjectFn)
        Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')

        # make a predictable stream of random numbers by seeding the RNG with 42
        random_source = np.random.RandomState(42)
        randomconfs = GetRandomConfs.run(P1, Assignments,
                                         NumRandomConformations, random_source)

        reference = Trajectory.load_trajectory_file(
            os.path.join(ReferenceDir, "2RandomConfs.lh5"))
        self.assert_trajectories_equal(reference, randomconfs)
Пример #25
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")

    # arglib.die_if_path_exists(args.output_dir)
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    project = Project.load_from(args.project)
    gens = md.load(args.generators)

    if isinstance(metric, metrics.RMSD):
        # this is really bad design, and we're going to fix it soon in
        # MSMBuilder3, but here's the deal. When Cluster.py loads up the
        # trajectories (Cluster.py:load_trajectories()), it only loads the
        # required indices for RMSD. This means that when it saves the Gens
        # file, that file contains only a subset of the atoms. So when
        # we run *this* script, we need to perform a restricted load of the
        # the trajectories on disk, but we need to NOT perform a restricted
        # load of the gens.h5 file. (By restricted load, I mean loading
        # only a subset of the data in the file)
        if gens.n_atoms != len(metric.atomindices):
            msg = ('Using RMSD clustering/assignment, this script expects '
                   'that the Cluster.py script saves a generators file that '
                   'only contains the indices of the atoms of interest, and '
                   'not any of the superfluous degrees of freedom that were '
                   'not used for clustering. But you supplied %d cluster '
                   'centers each containg %d atoms. Your atom indices file '
                   'on the other hand contains %d atoms') \
                % (gens.xyz.shape[0], gens.xyz.shape[1],
                   len(metric.atomindices))
            raise ValueError(msg)

        # now that we're telling the assign function only to load up a
        # subset of the atoms, an the generator is already only a subset,
        # the actual RMSD object needs to, from ITS perspective, operate on
        # every degree of freedom. So it shouldn't be aware of any special
        # atom_indices
        atom_indices = metric.atomindices
        metric.atomindices = None
        # this runs assignment and prints them to disk
        assign_with_checkpoint(metric,
                               project,
                               gens,
                               assignments_path,
                               distances_path,
                               atom_indices_to_load=atom_indices)
    else:
        assign_with_checkpoint(metric, project, gens, assignments_path,
                               distances_path)

    logger.info('All Done!')
Пример #26
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")

    #arglib.die_if_path_exists(args.output_dir)
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    if isinstance(metric, metrics.RMSD):
        # this is really bad design, and we're going to fix it soon in
        # MSMBuilder3, but here's the deal. When Cluster.py loads up the
        # trajectories (Cluster.py:load_trajectories()), it only loads the
        # required indices for RMSD. This means that when it saves the Gens
        # file, that file contains only a subset of the atoms. So when
        # we run *this* script, we need to perform a restricted load of the
        # the trajectories on disk, but we need to NOT perform a restricted
        # load of the gens.lh5 file. (By restricted load, I mean loading
        # only a subset of the data in the file)
        if gens['XYZList'].shape[1] != len(metric.atomindices):
            msg = ('Using RMSD clustering/assignment, this script expects '
                   'that the Cluster.py script saves a generators file that '
                   'only contains the indices of the atoms of interest, and '
                   'not any of the superfluous degrees of freedom that were '
                   'not used for clustering. But you supplied %d cluster '
                   'centers each containg %d atoms. Your atom indices file '
                   'on the other hand contains %d atoms') \
                    % (gens['XYZList'].shape[0], gens['XYZList'].shape[1],
                       len(metric.atomindices))
            raise ValueError(msg)


        # now that we're telling the assign function only to load up a
        # subset of the atoms, an the generator is already only a subset,
        # the actual RMSD object needs to, from ITS perspective, operate on
        # every degree of freedom. So it shouldn't be aware of any special
        # atom_indices
        atom_indices = metric.atomindices
        metric.atomindices = None
        # this runs assignment and prints them to disk
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path, atom_indices_to_load=atom_indices)
    else:
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path)

    logger.info('All Done!')
Пример #27
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance')
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(
        k, d, args.hierarchical_clustering_zmatrix, args.stride, project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Пример #28
0
def load_trajectories(projectfn, stride, atom_indices):
    project = Project.load_from(projectfn)

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride, atom_indices=atom_indices)
        
        if atom_indices != None:
            assert len(atom_indices) == traj['XYZList'].shape[1]
        
        list_of_trajs.append(traj)

    return list_of_trajs
Пример #29
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance'
        )
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(k, d, args.hierarchical_clustering_zmatrix, args.stride,
                       project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Пример #30
0
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
Пример #32
0
def entry_point():
    args, prep_metric = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    min_length = int(float(args.min_length))
    # need to convert to float first because int can't
    # convert a string that is '1E3' for example...weird.

    tica_obj = run(
        prep_metric, project, args.delta_time, atom_indices=atom_indices,
        output=args.output, min_length=min_length, stride=args.stride)
Пример #33
0
def load(filename):
    # delay these imports, since this module is loaded in a bunch
    # of places but not necessarily used
    import scipy.io
    from msmbuilder import Project

    # the filename extension
    ext = os.path.splitext(filename)[1]

    # load trajectories
    if ext != '.h5' and ext in md._FormatRegistry.loaders.keys():
        val = md.load(filename)

    # load flat text files
    elif 'AtomIndices.dat' in filename:
        # try loading AtomIndices first, because the default for loadtxt
        # is to use floats
        val = np.loadtxt(filename, dtype=np.int)
    elif ext in ['.dat']:
        # try loading general .dats with floats
        val = np.loadtxt(filename)

    # short circuit opening ProjectInfo
    elif ('ProjectInfo.yaml'
          in filename) or ('ProjectInfo.h5' in filename) or (re.search(
              'ProjectInfo.*\.yaml', filename)):
        val = Project.load_from(filename)

    # load with serializer files that end with .h5, .hdf or .h5.distances
    elif ext in ['.h5', '.hdf']:
        val = io.loadh(filename, deferred=False)
    elif filename.endswith('.h5.distances'):
        val = io.loadh(filename, deferred=False)

    # load matricies
    elif ext in ['.mtx']:
        val = scipy.io.mmread(filename)

    else:
        raise TypeError(
            "I could not infer how to load this file. You "
            "can either request load=False, or perhaps add more logic to "
            "the load heuristics in this class: %s" % filename)

    return val
Пример #34
0
def main(modeldir):
    proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    data=dict()
    data['dist']=numpy.loadtxt('%s/prot_lig_distance.dat' % modeldir, usecols=(1,))
    data['rmsd']=numpy.loadtxt('%s/Gens.rmsd.dat' % modeldir, usecols=(2,))
    com=numpy.loadtxt('%s/Gens.vmd_com.dat' % modeldir, usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'])
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    pylab.scatter(data['com'][frames], data['rmsd'][frames])
    pylab.scatter([refcom,], [0,], c='k', marker='x', s=100)
    pylab.xlabel('P-L COM')
    pylab.ylabel('P-L RMSD')
    pylab.show()
Пример #35
0
def main(modeldir):
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)

    if not os.path.exists('%s/adaptive-states/' % modeldir):
        os.mkdir('%s/adaptive-states/' % modeldir)
    for state in sorted(set(ass['arr_0'].flatten())):
        if state!=-1:
            t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 5)
            for i in range(0, 5):
                print state, i
                (a, b, c) =t[0]['XYZList'].shape
                movie=project.empty_traj()
                movie['XYZList']=numpy.zeros((1, b, c), dtype=numpy.float32)
                movie['XYZList'][0]=t[0]['XYZList'][i]
                movie.save_to_pdb('%s/adaptive-states/state%s-%s.pdb' % (modeldir, int(state), i))
Пример #36
0
def plot_raw_trajectory(i):
    from rmagic import r
    p = Project.load_from('project/Project.yaml')
    traj = p.load_traj(i)['XYZList']
    
    r.push(x=traj[:,0], y=traj[:,1], ts=np.arange(p.traj_lengths[i]))
    r.push(bounds=[-5,5])
    r.eval('''
    library(ggplot2)
    p = ggplot(data=data.frame(x=x, y=y, ts=ts), aes(x=x, y=y, color=ts))
    p = p + geom_path()
    #p = p + geom_point()
    p = p + scale_x_continuous(limits=bounds)
    p = p + scale_y_continuous(limits=bounds)
    p = p + scale_color_continuous(low='black', high='lightblue')
    p = p + ggtitle('One of the trajectories')
    ggsave('plot.png')
    system('open plot.png')
    ''')
Пример #37
0
def plot_raw_trajectory(i):
    from rmagic import r
    p = Project.load_from('project/Project.yaml')
    traj = p.load_traj(i)['XYZList']

    r.push(x=traj[:, 0], y=traj[:, 1], ts=np.arange(p.traj_lengths[i]))
    r.push(bounds=[-5, 5])
    r.eval('''
    library(ggplot2)
    p = ggplot(data=data.frame(x=x, y=y, ts=ts), aes(x=x, y=y, color=ts))
    p = p + geom_path()
    #p = p + geom_point()
    p = p + scale_x_continuous(limits=bounds)
    p = p + scale_y_continuous(limits=bounds)
    p = p + scale_color_continuous(low='black', high='lightblue')
    p = p + ggtitle('One of the trajectories')
    ggsave('plot.png')
    system('open plot.png')
    ''')
Пример #38
0
def main():
    parser = arglib.ArgumentParser(
        description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""",
        get_metric=True
    )  #, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('project')
    parser.add_argument(dest='generators',
                        help='''Output trajectory file containing
        the structures of each of the cluster centers. Note that for hierarchical clustering
        methods, this file will not be produced.''',
                        default='Data/Gens.lh5')
    parser.add_argument('output_dir')

    args, metric = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)

    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path,
                           distances_path)

    logger.info('All Done!')
Пример #39
0
def load(filename):
    # delay these imports, since this module is loaded in a bunch
    # of places but not necessarily used
    import scipy.io
    from msmbuilder import Project
    
    # the filename extension
    ext = os.path.splitext(filename)[1]

    # load trajectories
    if ext != '.h5' and ext in md._FormatRegistry.loaders.keys():
        val = md.load(filename)

    # load flat text files
    elif 'AtomIndices.dat' in filename:
        # try loading AtomIndices first, because the default for loadtxt
        # is to use floats
        val = np.loadtxt(filename, dtype=np.int)
    elif ext in ['.dat']:
        # try loading general .dats with floats
        val = np.loadtxt(filename)
    
    # short circuit opening ProjectInfo
    elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename) or (re.search('ProjectInfo.*\.yaml', filename)):
        val = Project.load_from(filename)
        
    # load with serializer files that end with .h5, .hdf or .h5.distances
    elif ext in ['.h5', '.hdf']:
        val = io.loadh(filename, deferred=False)
    elif filename.endswith('.h5.distances'):
        val = io.loadh(filename, deferred=False)

    # load matricies
    elif ext in ['.mtx']:
        val = scipy.io.mmread(filename)
        
    else:
        raise TypeError("I could not infer how to load this file. You "
            "can either request load=False, or perhaps add more logic to "
            "the load heuristics in this class: %s" % filename)

    return val
Пример #40
0
def main():
    """Parse command line inputs, load up files, and build a movie."""
    args = parser.parse_args()
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)

    project = Project.load_from(args.project)
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(
        assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Пример #41
0
def entry_point():
    """Parse command line inputs, load up files, and build a movie."""
    args = parser.parse_args()
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)

    project = Project.load_from(args.project)
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(
        assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Пример #42
0
def entry_point():
    args, prep_metric = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    min_length = int(float(args.min_length))
    # need to convert to float first because int can't
    # convert a string that is '1E3' for example...weird.

    tica_obj = run(prep_metric,
                   project,
                   args.delta_time,
                   atom_indices=atom_indices,
                   output=args.output,
                   min_length=min_length,
                   stride=args.stride)
def entry_point():
    """Parse command line inputs, load up files, then call run() and save() to do
    the real work"""
    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    # load...
    # project
    project = Project.load_from(args.project)

    # assignments
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    # states
    if -1 in args.states:
        states = np.unique(assignments[np.where(assignments != -1)])
        logger.info('Yanking from all %d states', len(states))
    else:
        # ensure that the states are sorted, and that they're unique -- you
        # can only request each state once
        states = np.unique(args.states)
        logger.info("Yanking from the following states: %s", states)

    # extract the conformations using np.random for the randomness
    confs_by_state = project.get_random_confs_from_states(
        assignments,
        states=states,
        num_confs=args.conformations_per_state,
        replacement=args.replacement)

    # save the conformations to disk, in the requested style
    save(confs_by_state=confs_by_state,
         states=states,
         style=args.style,
         format=args.format,
         outdir=args.output_dir)
Пример #44
0
def test_FahProjectBuilder2():
    cd = os.getcwd()
    td = tempfile.mkdtemp()
    os.chdir(td)

    # check that we can build a new project:
    traj_dir = get("project_reference/project.builder/fah_style_data", just_filename=True)
    conv_traj_dir = get("project_reference/project.builder/Trajectories", just_filename=True)
    shutil.copytree(traj_dir, 'PROJXXXX')
    shutil.copytree(conv_traj_dir, 'Trajectories')
    shutil.copy2(get("project_reference/project.builder/ProjectInfo.yaml", just_filename=True), 'ProjectInfo.yaml')
    project_orig = Project.load_from('ProjectInfo.yaml')
    # made up project data

    pb = FahProjectBuilder('PROJXXXX', '.xtc', 'PROJXXXX/native.pdb', project=project_orig)
    project = pb.get_project()
    project_ref = get("project_reference/project.builder/ProjectInfo_final.yaml")

    assert project == project_ref

    os.chdir(cd)
    shutil.rmtree(td)
Пример #45
0
def main():
    parser = arglib.ArgumentParser(description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""", get_metric=True)#, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument( 'project')
    parser.add_argument( dest='generators', help='''Output trajectory file containing
        the structures of each of the cluster centers. Note that for hierarchical clustering
        methods, this file will not be produced.''', default='Data/Gens.lh5')
    parser.add_argument( 'output_dir' )
    
    args, metric = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path, distances_path)

    logger.info('All Done!')
Пример #46
0
    of all atoms in a given trajectory, or for all trajectories in the project. The
    output is a hdf5 file which contains the SASA for each atom in each frame
    in each trajectory (or the single trajectory you passed in.""")
    parser.add_argument('project')
    parser.add_argument('atom_indices',
                        help='Indices of atoms to calculate SASA',
                        default='all')
    parser.add_argument('output',
                        help='''hdf5 file for output. Note this will
        be THREE dimensional: ( trajectory, frame, atom ), unless you just ask for
        one trajectory, in which case it will be shape (frame, atom).''',
                        default='SASA.h5')
    parser.add_argument('traj_fn',
                        help='''Pass a trajectory file if you only
        want to calclate the SASA for a single trajectory''',
                        default='all')
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)

    SASA = run(project, atom_indices, args.traj_fn)

    io.saveh(args.output, SASA)
Пример #47
0
import os, sys
from msmbuilder import Project
import mdtraj as md
from mdtraj import io
import numpy as np

project = Project.load_from("ProjectInfo-RRR.yaml")
Rgs = -1 * np.ones((project.n_trajs, max(project.traj_lengths)))

for i in range(project.n_trajs):
    t = project.load_traj(i)
    rg = md.compute_rg(t)
    Rgs[i][:len(rg)] = rg

io.saveh('Rgs-RRR.h5', Rgs)
Пример #48
0
)
w = np.loadtxt(
    '/Users/tud51931/voelzlab/analysis/LifsonRoig/scripts/test_Fs_RRR_ff03/w_params.dat'
)
v = np.loadtxt(
    '/Users/tud51931/voelzlab/analysis/LifsonRoig/scripts/test_Fs_RRR_ff03/v_params.dat'
)

I = np.argsort(l)
w_max = w[I[-1]]
v_max = v[I[-1]]

#assignment = io.loadh('/Volumes/Guangfeng/Fs-peptide/Fs-ff03-owlsnest/HelixCoil/Data/Assignments.h5','arr_0')
assignment = io.loadh('results/Nv.h5', 'arr_0')
project = Project.load_from(
    '/Volumes/Guangfeng/Fs-peptide/Fs-ff03-owlsnest/HelixCoil/ProjectInfo.yaml'
)
c = Counter(assignment.reshape(1, -1)[0])
populations = np.zeros(np.max(c.keys()) + 1)


def calculate_weight_frame(w_array, v_array, w_param, v_param):
    weight = 1.0
    for i in w_array * w_param:
        if i != 0:
            weight = weight * i
    for i in v_array * v_param:
        if i != 0:
            weight = weight * i
    return weight
def main(args, metric, logger):

    project = Project.load_from(args.project)
    if not os.path.exists(args.generators):
        raise IOError('Could not open generators')
    generators = os.path.abspath(args.generators)
    output_dir = os.path.abspath(args.output_dir)

    # connect to the workers
    try:
        json_file = client_json_file(args.profile, args.cluster_id)
        client = parallel.Client(json_file, timeout=2)
    except parallel.error.TimeoutError as exception:
        msg = '\nparallel.error.TimeoutError: ' + str(exception)
        msg += "\n\nPerhaps you didn't start a controller?\n"
        msg += "(hint, use ipcluster start)"
        print >> sys.stderr, msg
        sys.exit(1)

    lview = client.load_balanced_view()

    # partition the frames into a bunch of vtrajs
    all_vtrajs = local.partition(project, args.chunk_size)

    # initialze the containers to save to disk
    f_assignments, f_distances = local.setup_containers(
        output_dir, project, all_vtrajs)

    # get the chunks that have not been computed yet
    valid_indices = np.where(
        f_assignments.root.completed_vtrajs[:] == False)[0]
    remaining_vtrajs = np.array(all_vtrajs)[valid_indices].tolist()

    logger.info('%d/%d jobs remaining', len(remaining_vtrajs), len(all_vtrajs))

    # send the workers the files they need to get started
    # dview.apply_sync(remote.load_gens, generators, project['ConfFilename'],
    #    metric)

    # get the workers going
    n_jobs = len(remaining_vtrajs)
    amr = lview.map(remote.assign,
                    remaining_vtrajs, [generators] * n_jobs, [metric] * n_jobs,
                    chunksize=1)

    pending = set(amr.msg_ids)

    while pending:
        client.wait(pending, 1e-3)
        # finished is the set of msg_ids that are complete
        finished = pending.difference(client.outstanding)
        # update pending to exclude those that just finished
        pending = pending.difference(finished)
        for msg_id in finished:
            # we know these are done, so don't worry about blocking
            async = client.get_result(msg_id)

            try:
                assignments, distances, chunk = async .result[0]
            except RemoteError as e:
                print 'Remote Error:'
                e.print_traceback()
                raise

            vtraj_id = local.save(f_assignments, f_distances, assignments,
                                  distances, chunk)

            log_status(logger, len(pending), n_jobs, vtraj_id, async)

    f_assignments.close()
    f_distances.close()

    logger.info('All done, exiting.')
Пример #50
0
def main(args, metric):

    if args.alg == 'sclarans' and args.stride != 1:
        logger.error(
            """You don't want to use a stride with sclarans. The whole point of
sclarans is to use a shrink multiple to accomplish the same purpose, but in parallel with
stochastic subsampling. If you cant fit all your frames into  memory at the same time, maybe you
could stride a little at the begining, but its not recommended.""")
        sys.exit(1)

    # if we have a metric that explicitly operates on a subset of indices,
    # then we provide the option to only load those indices into memory
    # WARNING: I also do something a bit dirty, and inject `None` for the
    # RMSD.atomindices to get the metric to not splice
    if isinstance(metric, metrics.RMSD):
        atom_indices = metric.atomindices
        metric.atomindices = None  # probably bad...
        logger.info('RMSD metric - loading only the atom indices required')
    else:
        atom_indices = None

    # In case the clustering / algorithm needs extra arguments, use
    # this dictionary
    extra_kwargs = {}

    # Check to be sure we won't overwrite any data
    if args.alg == 'hierarchical':
        zmatrix_fn = os.path.join(args.output_dir, 'ZMatrix.h5')
        die_if_path_exists(zmatrix_fn)
        extra_kwargs['zmatrix_fn'] = zmatrix_fn
    else:
        generators_fn = os.path.join(args.output_dir, 'Gens.h5')
        die_if_path_exists(generators_fn)
        if args.stride == 1:
            assignments_fn = os.path.join(args.output_dir, 'Assignments.h5')
            distances_fn = os.path.join(args.output_dir,
                                        'Assignments.h5.distances')
            die_if_path_exists([assignments_fn, distances_fn])

    project = Project.load_from(args.project)

    if isinstance(metric,
                  metrics.Vectorized) and not args.alg == 'hierarchical':
        # if the metric is vectorized then
        # we can load prepared trajectories
        # which may allow for better memory
        # efficiency
        ptrajs, which = load_prep_trajectories(project, args.stride,
                                               atom_indices, metric)
        trajectories = None
        n_trajs = len(ptrajs)

        num_frames = np.sum([len(p) for p in ptrajs])
        if num_frames != len(which):
            raise Exception("something went wrong in loading step (%d v %d)" %
                            (num_frames, len(which)))
    else:
        trajectories = load_trajectories(project, args.stride, atom_indices)
        ptrajs = None
        which = None
        n_trajs = len(trajectories)

    logger.info('Loaded %d trajs', n_trajs)

    clusterer = cluster(metric, trajectories, ptrajs, args, **extra_kwargs)

    if not isinstance(clusterer, clustering.Hierarchical):

        if isinstance(metric, metrics.Vectorized):
            gen_inds = clusterer.get_generator_indices()
            generators = project.load_frame(which[gen_inds, 0], which[gen_inds,
                                                                      1])
        else:
            generators = clusterer.get_generators_as_traj()

        logger.info('Saving %s', generators_fn)
        generators.save(generators_fn)

        if args.stride == 1:
            assignments = clusterer.get_assignments()
            distances = clusterer.get_distances()

            logger.info('Since stride=1, Saving %s', assignments_fn)
            logger.info('Since stride=1, Saving %s', distances_fn)
            io.saveh(assignments_fn, assignments)
            io.saveh(distances_fn, distances)
def run(projectfn, conf_filename, input_dir, source, min_length, stride,
        rmsd_cutoff, atom_indices, iext):

    # check if we are doing an update or a fresh run
    # if os.path.exists(projectfn):
    #     logger.info("Found project info file encoding previous work, running in update mode...")
    #     update = True
    # else:
    #     update = False
    #
    # logger.info("Looking for %s style data in %s", source, input_dir)
    # if update:
    #     raise NotImplementedError("Ack! Update mode is not yet ready yet.")

    # if the source is fah, we'll use some special FaH specific loading functions
    # to (1) try to recover in case of errors and (2) load the specific directory
    # hierarchy of FaH (RUN/CLONE/GEN/frame.xtc)
    if os.path.exists(projectfn):
        project = Project.load_from(projectfn)
        logger.warn(
            "%s exists, will modify it and update the trajectories in %s",
            projectfn, '/'.join(project._traj_paths[0].split('/')[:-1]))
    else:
        project = None

    if source.startswith('file'):
        pb = ProjectBuilder(input_dir,
                            input_traj_ext=iext,
                            conf_filename=conf_filename,
                            stride=stride,
                            project=project,
                            atom_indices=atom_indices)
    elif source == 'fah':
        pb = FahProjectBuilder(input_dir,
                               input_traj_ext=iext,
                               conf_filename=conf_filename,
                               stride=stride,
                               project=project,
                               atom_indices=atom_indices)
    else:
        raise ValueError("Invalid argument for source: %s" % source)

    # check that trajectories to not go farther than a certain RMSD
    # from the PDB. Useful to check for blowing up or other numerical
    # instabilities
    if rmsd_cutoff is not None:
        # TODO: this is going to use ALL of the atom_indices, including hydrogen. This is
        # probably not the desired functionality
        # KAB: Apparently needed to use correctly subsetted atom_indices here to avoid an error
        validator = validators.RMSDExplosionValidator(
            conf_filename, max_rmsd=rmsd_cutoff, atom_indices=atom_indices)
        pb.add_validator(validator)

    # Only accept trajectories with more snapshots than min_length.
    if min_length > 0:
        validator = validators.MinLengthValidator(min_length)
        pb.add_validator(validator)

    # everyone wants to be centered
    pb.add_validator(validators.TrajCenterer())

    pb.get_project().save(projectfn)
    assert os.path.exists(projectfn), '%s does not exist' % projectfn
    logger.info("Finished data conversion successfully.")
    logger.info("Generated: %s, Trajectories/", projectfn)

    return