Пример #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-n', '--n_trajs', help='number of trajectories. Default=10', type=int, default=10)
    parser.add_argument('-t', '--traj_length', help='trajectories length. Default=10000', type=int, default=10000)
    args = parser.parse_args()
    
    # these could be configured
    kT = 15.0
    dt = 0.1
    mGamma = 1000.0
    
    forcecalculator = muller.muller_force()
    

    project = Project({'ConfFilename': os.path.join(mullermsm.__path__[0], 'conf.pdb'),
              'NumTrajs': args.n_trajs,
              'ProjectRootDir': '.',
              'TrajFileBaseName': 'trj',
              'TrajFilePath': 'Trajectories',
              'TrajFileType': '.lh5',
              'TrajLengths': [args.traj_length]*args.n_trajs})
              
              
    if os.path.exists('ProjectInfo.h5'):
        print >> sys.stderr, "The file ./ProjectInfo.h5 already exists. I don't want to overwrite anything, so i'm backing off"
        sys.exit(1)
    
    
    try:
        os.mkdir('Trajectories')
    except OSError:
        print >> sys.stderr, "The directory ./Trajectores already exists. I don't want to overwrite anything, so i'm backing off"
        sys.exit(1)
        
    for i in range(args.n_trajs):
        print 'simulating traj %s' % i
        
        # select initial configs randomly from a 2D box
        initial_x = [random.uniform(-1.5, 1.2), random.uniform(-0.2, 2)]
        print 'starting conformation from randomly sampled points (%s, %s)' % (initial_x[0], initial_x[1])
        print 'propagating for %s steps on the Muller potential with a Langevin integrator...' % args.traj_length
        
        positions = muller.propagate(args.traj_length, initial_x, kT, dt, mGamma, forcecalculator)

        # positions is N x 2, but we want to make it N x 1 x 3 where the additional
        # column is just zeros. This way, being N x 1 x 3, it looks like a regular MD
        # trajectory that would be N_frames x N_atoms x 3
        positions3 = np.hstack((positions, np.zeros((len(positions),1)))).reshape((len(positions), 1, 3))
        t = Trajectory.LoadTrajectoryFile(project['ConfFilename'])
        t['XYZList'] = positions3
        
        t.SaveToLHDF(project.GetTrajFilename(i))
        print 'saving trajectory to %s' % project.GetTrajFilename(i)
        
    project.SaveToHDF('ProjectInfo.h5')
    print 'saved ProjectInfo.h5 file'

    
    pickle.dump(metric.EuclideanMetric(), open('metric.pickl', 'w'))
    print 'saved metric.pickl'
def run(traj_dir, conf_filename, project_filename, iext):
    logger.info("Rebuilding project.")
    file_list = glob.glob(traj_dir + "/trj*%s" % iext)
    num_traj = len(file_list)

    traj_lengths = np.zeros(num_traj, 'int')
    traj_paths = []

    if not os.path.exists(conf_filename):
        raise(IOError("Cannot find conformation file %s" % conf_filename))

    file_list = sorted(file_list, key=utils.keynat)
    for i, filename in enumerate(file_list):
        traj_lengths[i] = len(md.open(filename))
        traj_paths.append(filename)

    records = {
        "conf_filename": conf_filename,
        "traj_lengths": traj_lengths,
        "traj_paths": traj_paths,
        "traj_errors": [None for i in xrange(num_traj)],
        "traj_converted_from": [[] for i in xrange(num_traj)]
    }

    p = Project(records)
    p.save(project_filename)
    logger.info("Wrote %s" % project_filename)
Пример #3
0
 def test_a_ConvertDataToHDF(self):
     os.chdir(WorkingDir)
     shutil.copy(PDBFn,"./")
                 #def run(projectfn, PDBfn, InputDir, source, mingen, stride, rmsd_cutoff,  parallel='None'):
     ConvertDataToHDF.run(ProjectFn, PDBFn, TutorialDir+"/XTC", "file", 0, 1, None)
     P1 = Project.load_from(ProjectFn)
     
     r_P1 = Project.load_from(os.path.abspath(os.path.join('..', ReferenceDir, ProjectFn)))
     
     eq_(P1.n_trajs, r_P1.n_trajs)
     npt.assert_equal(P1.traj_lengths, r_P1.traj_lengths)
     eq_(os.path.basename(P1.traj_filename(0)), os.path.basename(r_P1.traj_filename(0)))
Пример #4
0
def test_project_1():
    'ensure that the counting of errors works right'
    records = {'conf_filename': None,
               'traj_lengths': [0,0,0],
               'traj_errors': [None, 1, None],
               'traj_paths': ['t0', 't1', 't2'],
               'traj_converted_from': [None, None, None]}
    proj = Project(records, validate=False)

    eq_(proj.n_trajs, 2)
    eq_(os.path.basename(proj.traj_filename(0)), 't0')

    # since t1 should be skipped
    eq_(os.path.basename(proj.traj_filename(1)), 't2')
Пример #5
0
    def save(self):
        "Save the trajs as a n MSMBuilder project"

        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None  # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project(
            {
                'conf_filename': os.path.abspath(self.conf_filename),
                'traj_lengths': self.n_frames * np.ones(self.n_trajs),
                'traj_paths': [os.path.abspath(e) for e in traj_paths],
                'traj_converted_from': [[] for i in range(self.n_trajs)],
                'traj_errors': [None for i in range(self.n_trajs)],
            },
            project_dir=self.project_dir,
            validate=True)
        p.save(pjoin(self.project_dir, 'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir, 'Project.yaml'))
        p._validate()
        assert np.all(
            (p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
Пример #6
0
def test_project_1():
    'ensure that the counting of errors works right'
    records = {'conf_filename': None,
               'traj_lengths': [0,0,0],
               'traj_errors': [None, 1, None],
               'traj_paths': ['t0', 't1', 't2'],
               'traj_converted_from': [None, None, None]}
    proj = Project(records, validate=False)

    eq_(proj.n_trajs, 2)
    eq_(os.path.basename(proj.traj_filename(0)), 't0')

    # since t1 should be skipped
    eq_(os.path.basename(proj.traj_filename(1)), 't2')
Пример #7
0
    def test_a_ConvertDataToHDF(self):
        os.chdir(WorkingDir)
        shutil.copy(PDBFn, "./")
        #def run(projectfn, PDBfn, InputDir, source, mingen, stride, rmsd_cutoff,  parallel='None'):
        ConvertDataToHDF.run(ProjectFn, PDBFn, TutorialDir + "/XTC", "file", 0,
                             1, None)
        P1 = Project.load_from(ProjectFn)

        r_P1 = Project.load_from(
            os.path.abspath(os.path.join('..', ReferenceDir, ProjectFn)))

        eq_(P1.n_trajs, r_P1.n_trajs)
        npt.assert_equal(P1.traj_lengths, r_P1.traj_lengths)
        eq_(os.path.basename(P1.traj_filename(0)),
            os.path.basename(r_P1.traj_filename(0)))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('assignments', default='Macro4/MacroAssignments.h5', help='Path to an assignments file. (default=Macro4/MacroAssignments.h5)')
    parser.add_argument('--project', default='ProjectInfo.yaml', help='Path to ProjectInfo.yaml file. (default=ProjectInfo.yaml)')
    args = parser.parse_args()

    project = Project.load_from(args.project)
    t = reduce(operator.add, (project.load_traj(i) for i in range(project.n_trajs)))

    phi_angles = md.compute_dihedrals(t, [PHI_INDICES]) * 180.0 / np.pi
    psi_angles = md.compute_dihedrals(t, [PSI_INDICES]) * 180.0 / np.pi
    state_index = np.hstack(io.loadh(args.assignments)['arr_0'])

    for i in np.unique(state_index):
        pp.plot(phi_angles[np.where(state_index == i)],
                psi_angles[np.where(state_index == i)],
                'x', label='State %d' % i)


    pp.title("Alanine Dipeptide Macrostates")
    pp.xlabel(r"$\phi$")
    pp.ylabel(r"$\psi$")
    annotate()

    pp.legend(loc=1, labelspacing=0.075, prop={'size': 8.0}, scatterpoints=1,
              markerscale=0.5, numpoints=1)
    pp.xlim([-180, 180])
    pp.ylim([-180, 180])
    pp.show()
Пример #9
0
def main(modeldir, genfile,  type):
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    data=dict()
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0])
    data['rmsd']=data['rmsd'][frames]
    com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'])
    data['com']=data['com'][frames]

    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)
    paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type))
    
    for p in range(0, 20):
        movie=project.empty_traj()
        path=paths['Paths'][p]
        flux=paths['fluxes'][p]/paths['fluxes'][0]
        if flux < 0.2:
            break
        print "flux %s" % flux
        frames=numpy.where(path!=-1)[0]
        path=numpy.array(path[frames], dtype=int)
        for (n, state) in enumerate(path):
            t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 20)
            if n==0:
                movie['XYZList']=t[0]['XYZList']
            else:
                movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
        movie.save_to_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p))
Пример #10
0
def run(projectfn, conf_filename, input_dir, source, min_length, stride, rmsd_cutoff, atom_indices, iext):

    # check if we are doing an update or a fresh run
    # if os.path.exists(projectfn):
    #     logger.info("Found project info file encoding previous work, running in update mode...")
    #     update = True
    # else:
    #     update = False
    #
    # logger.info("Looking for %s style data in %s", source, input_dir)
    # if update:
    #     raise NotImplementedError("Ack! Update mode is not yet ready yet.")

    # if the source is fah, we'll use some special FaH specific loading functions
    # to (1) try to recover in case of errors and (2) load the specific directory
    # hierarchy of FaH (RUN/CLONE/GEN/frame.xtc)
    if os.path.exists(projectfn):
        project = Project.load_from(projectfn)
        logger.warn(
            "%s exists, will modify it and update the trajectories in %s",
            projectfn, '/'.join(project._traj_paths[0].split('/')[:-1]))
    else:
        project = None

    if source.startswith('file'):
        pb = ProjectBuilder(
            input_dir, input_traj_ext=iext, conf_filename=conf_filename,
            stride=stride, project=project, atom_indices=atom_indices)
    elif source == 'fah':
        pb = FahProjectBuilder(
            input_dir, input_traj_ext=iext, conf_filename=conf_filename,
            stride=stride, project=project, atom_indices=atom_indices)
    else:
        raise ValueError("Invalid argument for source: %s" % source)

    # check that trajectories to not go farther than a certain RMSD
    # from the PDB. Useful to check for blowing up or other numerical
    # instabilities
    if rmsd_cutoff is not None:
        # TODO: this is going to use ALL of the atom_indices, including hydrogen. This is
        # probably not the desired functionality
        # KAB: Apparently needed to use correctly subsetted atom_indices here to avoid an error
        validator = validators.RMSDExplosionValidator(
            conf_filename, max_rmsd=rmsd_cutoff, atom_indices=atom_indices)
        pb.add_validator(validator)

    # Only accept trajectories with more snapshots than min_length.
    if min_length > 0:
        validator = validators.MinLengthValidator(min_length)
        pb.add_validator(validator)

    # everyone wants to be centered
    pb.add_validator(validators.TrajCenterer())

    pb.get_project().save(projectfn)
    assert os.path.exists(projectfn), '%s does not exist' % projectfn
    logger.info("Finished data conversion successfully.")
    logger.info("Generated: %s, Trajectories/", projectfn)

    return
Пример #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--project', default='ProjectInfo.h5')
    parser.add_argument('-a', '--assignments', default='Data/Assignments.h5')
    args = parser.parse_args()

    a = Serializer.LoadData(args.assignments)
    p = Project.LoadFromHDF(args.project)
    maxx, maxy, minx, miny = -np.inf, -np.inf, np.inf, np.inf
    n_states = np.max(a) + 1

    x = np.concatenate([p.LoadTraj(i)['XYZList'][:, 0, 0] for i in range(p['NumTrajs'])])
    y = np.concatenate([p.LoadTraj(i)['XYZList'][:, 0, 1] for i in range(p['NumTrajs'])])
    a = np.concatenate([a[i, :] for i in range(p['NumTrajs'])])
    
    plot_v(minx=np.min(x), maxx=np.max(x), miny=np.min(y), maxy=np.max(y))
    colors = ['b', 'r', 'm', 'c', 'g']
    for j in xrange(n_states):
        w = np.where(a == j)[0]    
        pp.scatter(x[w], y[w], marker='x', c=colors[j], label='State %d' % j,
                   edgecolor=colors[j], alpha=0.5)

    
    pp.legend()
    pp.show()
Пример #12
0
def main():
    """Parse command line inputs, load up files, and build a movie."""

    parser = arglib.ArgumentParser(description="""
Create an MSM movie by sampling a sequence of states and sampling a 
random conformation from each state in the sequence.  
""")
    parser.add_argument('project')
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument('tprob', default='Data/tProb.mtx')
    parser.add_argument('num_steps')
    parser.add_argument('starting_state', type=int, help='''Which state to start trajectory from.''')
    parser.add_argument('output', default='sample_traj.pdb', help="""The filename of your output trajectory.  The filetype suffix will be used to select the output file format.""")
    args = parser.parse_args()

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
    
    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)
    
    project = Project.load_from(args.project)    
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Пример #13
0
def main():
    """Parse command line inputs, load up files, then call run() and save() to do
    the real work"""
    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    # load...
    # project
    project = Project.load_from(args.project)

    # assignments
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    # states
    if -1 in args.states:
        states = np.unique(assigments[np.where(assignments != -1)])
        logger.info('Yanking from all %d states', len(states))
    else:
        # ensure that the states are sorted, and that they're unique -- you
        # can only request each state once
        states = np.unique(args.states)
        logger.info("Yanking from the following states: %s", states)

    # extract the conformations using np.random for the randomness
    confs_by_state = project.get_random_confs_from_states(
        assignments, states=states, num_confs=args.conformations_per_state,
        replacement=args.replacement)

    # save the conformations to disk, in the requested style
    save(confs_by_state=confs_by_state, states=states, style=args.style,
         format=args.format, outdir=args.output_dir)
Пример #14
0
def main(file):
    ass=io.loadh(file)
    dir=os.path.dirname(file)
    base=os.path.basename(file)
    newdir='%s/subsample' % dir
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    p=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    data=dict()
    totals=dict()
    iterations=int(ass['arr_0'].shape[1]/10.0)
    start=max(p.traj_lengths)
    for iter in range(0, iterations):
        new=start-10
        if new < 10:
            break
        totals[new]=0
        data[new]=-numpy.ones((ass['arr_0'].shape[0], new), dtype=int)
        for i in range(0, ass['arr_0'].shape[0]):
            data[new][i]=ass['arr_0'][i][:new]
            frames=numpy.where(data[new][i]!=-1)[0]
            totals[new]+=len(frames)
        start=new

    ohandle=open('%s/times.h5' % (newdir), 'w')
    for key in sorted(data.keys()):
        print data[key].shape
        print "total time is %s" % totals[key]
        ohandle.write('%s\t%s\t%s\n' % (data[key].shape[0], data[key].shape[1], totals[key]))
Пример #15
0
def main():
    global data2d
    global As
    # First I need to turn the assignments matrix into a 1D list of assignments
    sys.stdout = os.fdopen(sys.stdout.fileno(),'w',0)
    print "Reading in Assignments... from %s " % options.assFN
    As = io.loadh(options.assFN)['arr_0'].astype(int)
    print "Reading in data... from %s " % options.dataFN
    try: 
        f = io.loadh( options.dataFN )
        try:
            data2d = f['arr_0']
        except:
            data2d = f['Data']
    except:
        data = load(options.dataFN)
        proj = Project.load_from( options.projFN )
        data2d = msmTools.reshapeRawData( data, proj )

    print "Calculating averages for:"

    pool = mp.Pool(options.procs)
    clusters = range( As.max() + 1)
    result = pool.map_async(calcAvg,clusters[:])
    result.wait()
    sol = result.get()
    sol = array(sol)
    savetxt(options.outFN, sol)

    return
Пример #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-g', '--generators', default='Data/Gens.lh5', help='Path to Gens.lh5')
    parser.add_argument('-p', '--project', default='ProjectInfo.h5', help='Path to ProjectInfo.h5')
    parser.add_argument('-s', '--stride', default=5, type=int, help='Stride to plot the data at')
    args = parser.parse_args()
    
    
    gens = Trajectory.LoadTrajectoryFile(args.generators)
    gens_x = gens['XYZList'][:,0,0]
    gens_y =  gens['XYZList'][:,0,1]
    points = np.array([gens_x, gens_y]).transpose()
    
    
    
    tri = Delaunay(points)

    PL = []
    for p in points:
        PL.append(Voronoi.Site(x=p[0],y=p[1]))

    v,eqn,edges,wtf = Voronoi.computeVoronoiDiagram(PL)

    edge_points=[]
    for (l,x1,x2) in edges:
        if x1>=0 and x2>=0:
            edge_points.append((v[x1],v[x2]))

    lines = LineCollection(edge_points, linewidths=0.5, color='k')
    
    fig = pp.figure()
    ax = fig.add_subplot(111)
    
    fig.gca().add_collection(lines)

    maxx, minx= np.max(gens_x), np.min(gens_x)
    maxy, miny = np.max(gens_y), np.min(gens_y)
    # plot the background
    plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy, ax=ax)
    pp.xlim(minx, maxx)
    pp.ylim(miny, maxy)

    # plot a single trajectory
    p = Project.LoadFromHDF(args.project)
    t = p.LoadTraj(0)
    x = t['XYZList'][:,0,0][::args.stride]
    y = t['XYZList'][:,0,1][::args.stride]
    cm = pp.get_cmap('spectral')

    n_points = len(x)
    ax.set_color_cycle([cm(1.*i/(n_points-1)) for i in range(n_points-1)])
    for i in range(n_points-1):
        ax.plot(x[i:i+2],y[i:i+2])

    pp.title('Voronoi Microstate Decomposition, with first trajectory')
    


    pp.show()
Пример #17
0
def main(modeldir, genfile, type, write=False):
    proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    data=dict()
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0])
    data['rmsd']=data['rmsd'][frames]
    com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'][frames])

    residues=['F36', 'H87', 'I56', 'I90', 'W59', 'Y82', 'hydrophob_dist', 'oxos_dist']
    loops=['loop1', 'loop2', 'loop3']
    for loop in loops:
        data[loop]=numpy.loadtxt('%s.%srmsd.dat' % (genfile.split('.lh5')[0], loop))
        data[loop]=data[loop][frames]
    for res in residues:
        file='%s_%spair.dat' % (genfile.split('.lh5')[0], res)
        if os.path.exists(file):
            data[res]=numpy.loadtxt(file)
            data[res]=data[res][frames]
    angles=['phi', 'omega']
    for ang in angles:
        file='%s_%s.dat' % (genfile.split('.lh5')[0], ang)
        if os.path.exists(file):
            data[ang]=numpy.loadtxt(file)
            data[ang]=data[ang][frames]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    bound=numpy.loadtxt('%s/tpt-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int)

    Tdense=T.todense()
    Tdata=dict()
    for i in unbound:
        for j in unbound:
            if Tdense[i,j]!=0:
                if i not in Tdata.keys():
                    Tdata[i]=[]
                Tdata[i].append(j)
    #print Tdata
    cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive
    Q=tpt.calculate_committors(unbound, bound, T)
    ohandle=open('%s/commitor_states.txt' % modeldir, 'w')
    for i in range(0,len(Q)):
        if Q[i]>0.40 and Q[i]<0.6:
            ohandle.write('%s\n' % i)
            #t=project.get_random_confs_from_states(ass['arr_0'], [int(i),], 20)
            #t[0].save_to_xtc('%s/commottor_state%s.xtc' % (modeldir, i))
    if write==True:
        for op in sorted(data.keys()):
            pylab.figure()
            pylab.scatter(data['com'], data[op],  c=Q, cmap=cm, alpha=0.7, s=[map_size(i) for i in Q])
            pylab.xlabel('L RMSD')
            pylab.ylabel(op)
            pylab.colorbar()
        pylab.show()
def main(assfile, lag, nproc):
    lag=int(lag)
    nproc=int(nproc)
    Assignments=io.loadh(assfile)
    num=int(assfile.split('Assignments_sub')[1].split('.h5')[0])
    dir=os.path.dirname(assfile)
    newdir='%s/boot-sub%s' % (dir, num)
    ref_sub=numpy.loadtxt('%s/times.h5' % dir, usecols=(1,))
    ref_total=numpy.loadtxt('%s/times.h5' % dir, usecols=(2,))
    times=dict()
    for (i,j) in zip(ref_sub, ref_total):
        times[i]=j

    proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    multinom=int(times[num])
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    if 'Data' in Assignments.keys():
        Assignments=Assignments['Data']
    else:
        Assignments=Assignments['arr_0']
    print Assignments.shape
    NumStates = max(Assignments.flatten()) + 1
    Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True)
    Counts=Counts.todense()
    Counts=Counts*(1.0/lag)
    T=numpy.array(Counts)
    frames=numpy.where(T==0)
    T[frames]=1
    Popsample=dict()
    iteration=0
    total_iteration=100/nproc
    print "%s total iterations" % total_iteration
    if 100 % nproc != 0:
        remain=100 % nproc
    else:
        remain=False
    print "iterating thru tCount samples"
    count=0
    while iteration < 100:
        if count*nproc > 100:
            nproc=remain
        print "sampling iteration %s" % iteration
        Tfresh=T.copy()
        input = zip([Tfresh]*nproc, [multinom]*nproc, range(0, NumStates))
        pool = multiprocessing.Pool(processes=nproc)
        result = pool.map_async(parallel_get_matrix, input)
        result.wait()
        all = result.get()
        pool.terminate()
        for c_matrix in all:
            scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), c_matrix)
            #rev_counts, t_matrix, Populations, Mapping=x
            #scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix)
            #numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations)
            #numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping)
            iteration+=1
        count+=1
        print "dont with iteration %s" % iteration*nproc
Пример #19
0
 def setup(self):
     self.metric = metrics.Dihedral()
     self.pdb_fn = os.path.join(fixtures_dir(), 'native.pdb')
     self.trj_fn = os.path.join(fixtures_dir(), 'trj0.lh5')
     self.project = Project({'traj_lengths': [501], 'traj_paths': [self.trj_fn],
                        'conf_filename': self.pdb_fn, 'traj_converted_from': [None],
                        'traj_errors': [None]})
     self.vtraj = partition(self.project, chunk_size=501)[0]
Пример #20
0
def test_project_2():
    'inconsistent lengths should be detected'
    records = {'conf_filename': None,
               'traj_lengths': [0,0], # this is one too short
               'traj_errors': [None, None, None],
               'traj_paths': ['t0', 't1', 't2'],
               'traj_converted_from': [None, None, None]}
    proj = Project(records, validate=False)
Пример #21
0
    def save(self):
        "Save the trajs as a n MSMBuilder project"
        
        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project({'conf_filename': os.path.abspath(self.conf_filename),
            'traj_lengths': self.n_frames*np.ones(self.n_trajs),
            'traj_paths': [os.path.abspath(e) for e in traj_paths],
            'traj_converted_from': [[] for i in range(self.n_trajs)],
            'traj_errors': [None for i in range(self.n_trajs)],
            }, project_dir=self.project_dir, validate=True)
        p.save(pjoin(self.project_dir,'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir,'Project.yaml'))
        p._validate()
        assert np.all((p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
Пример #22
0
def test_rg_1():
    project = Project.load_from(os.path.join(fixtures_dir(), 'ProjectInfo.h5'))
    traj = project.load_traj(0)
    xyzlist = traj['XYZList']

    a = rgcalc.calculate_rg(xyzlist)
    b = reference_rg(xyzlist)
    
    npt.assert_array_almost_equal(a, b)
Пример #23
0
def test_rg_1():
    project = Project.load_from(os.path.join(fixtures_dir(), 'ProjectInfo.h5'))
    traj = project.load_traj(0)
    xyzlist = traj['XYZList']

    a = rgcalc.calculate_rg(xyzlist)
    b = reference_rg(xyzlist)

    npt.assert_array_almost_equal(a, b)
Пример #24
0
 def test_g_GetRandomConfs(self):
     P1 = Project.load_from(ProjectFn)
     Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')
     
     # make a predictable stream of random numbers by seeding the RNG with 42
     random_source = np.random.RandomState(42)
     randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source)
     
     reference = Trajectory.load_trajectory_file(os.path.join(ReferenceDir, "2RandomConfs.lh5"))
     self.assert_trajectories_equal(reference, randomconfs)
Пример #25
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path, distances_path)

    logger.info('All Done!')
Пример #26
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--project', default='ProjectInfo.h5')
    parser.add_argument(
        '-t',
        '--trajectories',
        nargs='+',
        help='''Supply either the path to a trajectory file (i.e. Data/Gens.lh5),
         or an integer, which will be interepreted as a trajectory index
         into the trajectories that accompany the project. default: plot all
         of the trajectories''',
        default=['-1'])
    args = parser.parse_args()

    p = Project.LoadFromHDF(args.project)

    # record the bounding box of the points so that we know
    # what to render for the background
    maxx, minx, maxy, miny = 1.2, -1.5, 2, -0.2

    # if -1 is included, add in ALL of the trajectories
    if '-1' in args.trajectories:
        args.trajectories.remove('-1')
        args.trajectories.extend(range(p['NumTrajs']))
    # remove duplicates
    args.trajectories = set(args.trajectories)

    for requested in args.trajectories:
        if os.path.exists(str(requested)):
            traj = Trajectory.LoadTrajectoryFile(str(requested))
            print 'plotting %s' % requested
            markersize = 50
        else:
            try:
                i = int(requested)
                traj = p.LoadTraj(i)
                print 'plotting %s' % i
                markersize = 5
            except ValueError:
                print >> sys.stderr, 'I couldnt figure out how to deal with the argument %s' % requested
                continue
            except IOError as e:
                print >> sys.stderr, str(e)
                continue

        xyz = traj['XYZList']
        x = xyz[:, 0, 0]
        y = xyz[:, 0, 1]

        maxx, maxy = max(np.max(x), maxx), max(np.max(y), maxy)
        minx, miny = min(np.min(x), minx), min(np.min(y), miny)
        pp.plot(x, y, '.', markersize=markersize, alpha=0.5)

    plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy)
    pp.show()
def main(assfile, lag, nproc):
    lag=int(lag)
    nproc=int(nproc)
    Assignments=io.loadh(assfile)
    dir=os.path.dirname(assfile)
    newdir='%s/sample-counts' % dir
    proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    multinom=sum(proj.traj_lengths)
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    if 'Data' in Assignments.keys():
        Assignments=Assignments['Data']
    else:
        Assignments=Assignments['arr_0']
    print Assignments.shape
    NumStates = max(Assignments.flatten()) + 1
    Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True)
    Counts=Counts.todense()
    Counts=Counts*(1.0/lag)
    T=numpy.array(Counts)
    frames=numpy.where(T==0)
    T[frames]=1
    Popsample=dict()
    iteration=0
    total_iteration=100/nproc
    print "%s total iterations" % total_iteration
    if 100 % nproc != 0:
        remain=100 % nproc
    else:
        remain=False
    print "iterating thru tCount samples"
    count=0
    while iteration < 100:
        if count*nproc > 100:
            nproc=remain
        print "sampling iteration %s" % iteration
        Tfresh=T.copy()
        counts=range(0, nproc)
        input = zip([Tfresh]*nproc, [multinom]*nproc, [NumStates]*nproc, counts)
        pool = multiprocessing.Pool(processes=nproc)
        result = pool.map_async(parallel_get_matrix, input)
        result.wait()
        all = result.get()
        print "computed resampled matrices"
        pool.terminate()
        for count_matrix in all:
            #rev_counts, t_matrix, Populations, Mapping=x
            scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), count_matrix)
           # scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix)
           # numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations)
           # numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping)
            iteration+=1
        count+=1
        print "dont with iteration %s" % iteration*nproc
Пример #28
0
def load_trajectories(projectfn, stride):
    project = Project.load_from(projectfn)

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride)
        list_of_trajs.append(traj)

    return list_of_trajs
Пример #29
0
def load_trajectories(projectfn, stride):
    project = Project.load_from(projectfn)

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride)
        list_of_trajs.append(traj)

    return list_of_trajs
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Пример #31
0
    def test_g_GetRandomConfs(self):
        P1 = Project.load_from(ProjectFn)
        Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')

        # make a predictable stream of random numbers by seeding the RNG with 42
        random_source = np.random.RandomState(42)
        randomconfs = GetRandomConfs.run(P1, Assignments,
                                         NumRandomConformations, random_source)

        reference = Trajectory.load_trajectory_file(
            os.path.join(ReferenceDir, "2RandomConfs.lh5"))
        self.assert_trajectories_equal(reference, randomconfs)
Пример #32
0
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Пример #33
0
def main(modeldir, start, type):
    start=int(start)
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0])
    pdb=files[0]
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    T=mmread('%s/tProb.mtx' % modeldir)
    startstate=unbound[start]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    steps=100000
    print "on start state %s" % startstate
    if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)):
        print "loading from states"
        traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate))
    else:
        traj=msm_analysis.sample(T, int(startstate),int(steps))
        numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj)
    print "checking for chkpt file"
    checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate))
    if len(checkfile) > 0:
        movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb)
        n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0])
        os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0]))
        print "checkpointing at state index %s out of %s" % (n, len(traj))
        checkfile=checkfile[0]
        restart=True
    else:
        restart=False
        n=0
        movie=project.empty_traj()
    while n < len(traj):
        print "on state %s" % n
        state=int(traj[n])
        t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10)
        if n==0:
            movie['XYZList']=t[0]['XYZList']
            n+=1
            continue
        elif n % 100==0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            if restart==True:
                os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0]))
            movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n))
            checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)
            n+=1
            continue
        elif n!=0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            n+=1
            continue
    movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
Пример #34
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")

    # arglib.die_if_path_exists(args.output_dir)
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    project = Project.load_from(args.project)
    gens = md.load(args.generators)

    if isinstance(metric, metrics.RMSD):
        # this is really bad design, and we're going to fix it soon in
        # MSMBuilder3, but here's the deal. When Cluster.py loads up the
        # trajectories (Cluster.py:load_trajectories()), it only loads the
        # required indices for RMSD. This means that when it saves the Gens
        # file, that file contains only a subset of the atoms. So when
        # we run *this* script, we need to perform a restricted load of the
        # the trajectories on disk, but we need to NOT perform a restricted
        # load of the gens.h5 file. (By restricted load, I mean loading
        # only a subset of the data in the file)
        if gens.n_atoms != len(metric.atomindices):
            msg = ('Using RMSD clustering/assignment, this script expects '
                   'that the Cluster.py script saves a generators file that '
                   'only contains the indices of the atoms of interest, and '
                   'not any of the superfluous degrees of freedom that were '
                   'not used for clustering. But you supplied %d cluster '
                   'centers each containg %d atoms. Your atom indices file '
                   'on the other hand contains %d atoms') \
                % (gens.xyz.shape[0], gens.xyz.shape[1],
                   len(metric.atomindices))
            raise ValueError(msg)

        # now that we're telling the assign function only to load up a
        # subset of the atoms, an the generator is already only a subset,
        # the actual RMSD object needs to, from ITS perspective, operate on
        # every degree of freedom. So it shouldn't be aware of any special
        # atom_indices
        atom_indices = metric.atomindices
        metric.atomindices = None
        # this runs assignment and prints them to disk
        assign_with_checkpoint(metric,
                               project,
                               gens,
                               assignments_path,
                               distances_path,
                               atom_indices_to_load=atom_indices)
    else:
        assign_with_checkpoint(metric, project, gens, assignments_path,
                               distances_path)

    logger.info('All Done!')
Пример #35
0
 def setup(self):
     self.metric = metrics.Dihedral()
     self.pdb_fn = os.path.join(fixtures_dir(), 'native.pdb')
     self.trj_fn = os.path.join(fixtures_dir(), 'trj0.lh5')
     self.project = Project({
         'NumTrajs': 1,
         'TrajLengths': [501],
         'TrajFileBaseName': 'trj',
         'TrajFileType': '.lh5',
         'ConfFilename': self.pdb_fn,
         'TrajFilePath': fixtures_dir()
     })
     self.vtraj = partition(self.project, chunk_size=501)[0]
Пример #36
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")

    #arglib.die_if_path_exists(args.output_dir)
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    if isinstance(metric, metrics.RMSD):
        # this is really bad design, and we're going to fix it soon in
        # MSMBuilder3, but here's the deal. When Cluster.py loads up the
        # trajectories (Cluster.py:load_trajectories()), it only loads the
        # required indices for RMSD. This means that when it saves the Gens
        # file, that file contains only a subset of the atoms. So when
        # we run *this* script, we need to perform a restricted load of the
        # the trajectories on disk, but we need to NOT perform a restricted
        # load of the gens.lh5 file. (By restricted load, I mean loading
        # only a subset of the data in the file)
        if gens['XYZList'].shape[1] != len(metric.atomindices):
            msg = ('Using RMSD clustering/assignment, this script expects '
                   'that the Cluster.py script saves a generators file that '
                   'only contains the indices of the atoms of interest, and '
                   'not any of the superfluous degrees of freedom that were '
                   'not used for clustering. But you supplied %d cluster '
                   'centers each containg %d atoms. Your atom indices file '
                   'on the other hand contains %d atoms') \
                    % (gens['XYZList'].shape[0], gens['XYZList'].shape[1],
                       len(metric.atomindices))
            raise ValueError(msg)


        # now that we're telling the assign function only to load up a
        # subset of the atoms, an the generator is already only a subset,
        # the actual RMSD object needs to, from ITS perspective, operate on
        # every degree of freedom. So it shouldn't be aware of any special
        # atom_indices
        atom_indices = metric.atomindices
        metric.atomindices = None
        # this runs assignment and prints them to disk
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path, atom_indices_to_load=atom_indices)
    else:
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path)

    logger.info('All Done!')
Пример #37
0
def load_trajectories(projectfn, stride, atom_indices):
    project = Project.load_from(projectfn)

    list_of_trajs = []
    for i in xrange(project.n_trajs):
        # note, LoadTraj is only using the fast strided loading for
        # HDF5 formatted trajs
        traj = project.load_traj(i, stride=stride, atom_indices=atom_indices)
        
        if atom_indices != None:
            assert len(atom_indices) == traj['XYZList'].shape[1]
        
        list_of_trajs.append(traj)

    return list_of_trajs
Пример #38
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance')
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(
        k, d, args.hierarchical_clustering_zmatrix, args.stride, project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
def run(traj_dir, conf_filename, project_filename):
    logger.info("Rebuilding project.")
    file_list = glob.glob(traj_dir + "/trj*.lh5")
    num_traj = len(file_list)
    
    traj_lengths = np.zeros(num_traj,'int')
    traj_paths = []
    
    file_list = sorted(file_list, key=utils.keynat)
    for i,filename in enumerate(file_list):
        traj_lengths[i] = Trajectory.load_trajectory_file(filename,JustInspect=True)[0]
        traj_paths.append(filename)    
    
    records = {
    "conf_filename":conf_filename,
    "traj_lengths":traj_lengths,
    "traj_paths":traj_paths,
    "traj_errors": [None for i in xrange(num_traj)],
    "traj_converted_from":[[] for i in xrange(num_traj)]           
    }
    
    p = Project(records)
    p.save(project_filename)
    logger.info("Wrote %s" % project_filename)
Пример #40
0
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
Пример #41
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance'
        )
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(k, d, args.hierarchical_clustering_zmatrix, args.stride,
                       project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
Пример #43
0
def entry_point():
    args, prep_metric = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    min_length = int(float(args.min_length))
    # need to convert to float first because int can't
    # convert a string that is '1E3' for example...weird.

    tica_obj = run(
        prep_metric, project, args.delta_time, atom_indices=atom_indices,
        output=args.output, min_length=min_length, stride=args.stride)
Пример #44
0
def load(filename):
    # delay these imports, since this module is loaded in a bunch
    # of places but not necessarily used
    import scipy.io
    from msmbuilder import Project

    # the filename extension
    ext = os.path.splitext(filename)[1]

    # load trajectories
    if ext != '.h5' and ext in md._FormatRegistry.loaders.keys():
        val = md.load(filename)

    # load flat text files
    elif 'AtomIndices.dat' in filename:
        # try loading AtomIndices first, because the default for loadtxt
        # is to use floats
        val = np.loadtxt(filename, dtype=np.int)
    elif ext in ['.dat']:
        # try loading general .dats with floats
        val = np.loadtxt(filename)

    # short circuit opening ProjectInfo
    elif ('ProjectInfo.yaml'
          in filename) or ('ProjectInfo.h5' in filename) or (re.search(
              'ProjectInfo.*\.yaml', filename)):
        val = Project.load_from(filename)

    # load with serializer files that end with .h5, .hdf or .h5.distances
    elif ext in ['.h5', '.hdf']:
        val = io.loadh(filename, deferred=False)
    elif filename.endswith('.h5.distances'):
        val = io.loadh(filename, deferred=False)

    # load matricies
    elif ext in ['.mtx']:
        val = scipy.io.mmread(filename)

    else:
        raise TypeError(
            "I could not infer how to load this file. You "
            "can either request load=False, or perhaps add more logic to "
            "the load heuristics in this class: %s" % filename)

    return val
Пример #45
0
def main(modeldir):
    proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    data=dict()
    data['dist']=numpy.loadtxt('%s/prot_lig_distance.dat' % modeldir, usecols=(1,))
    data['rmsd']=numpy.loadtxt('%s/Gens.rmsd.dat' % modeldir, usecols=(2,))
    com=numpy.loadtxt('%s/Gens.vmd_com.dat' % modeldir, usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'])
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    pylab.scatter(data['com'][frames], data['rmsd'][frames])
    pylab.scatter([refcom,], [0,], c='k', marker='x', s=100)
    pylab.xlabel('P-L COM')
    pylab.ylabel('P-L RMSD')
    pylab.show()
Пример #46
0
def main(modeldir):
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)

    if not os.path.exists('%s/adaptive-states/' % modeldir):
        os.mkdir('%s/adaptive-states/' % modeldir)
    for state in sorted(set(ass['arr_0'].flatten())):
        if state!=-1:
            t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 5)
            for i in range(0, 5):
                print state, i
                (a, b, c) =t[0]['XYZList'].shape
                movie=project.empty_traj()
                movie['XYZList']=numpy.zeros((1, b, c), dtype=numpy.float32)
                movie['XYZList'][0]=t[0]['XYZList'][i]
                movie.save_to_pdb('%s/adaptive-states/state%s-%s.pdb' % (modeldir, int(state), i))
Пример #47
0
def main():
    parser = arglib.ArgumentParser(
        description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""",
        get_metric=True
    )  #, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('project')
    parser.add_argument(dest='generators',
                        help='''Output trajectory file containing
        the structures of each of the cluster centers. Note that for hierarchical clustering
        methods, this file will not be produced.''',
                        default='Data/Gens.lh5')
    parser.add_argument('output_dir')

    args, metric = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)

    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path,
                           distances_path)

    logger.info('All Done!')
Пример #48
0
def plot_raw_trajectory(i):
    from rmagic import r
    p = Project.load_from('project/Project.yaml')
    traj = p.load_traj(i)['XYZList']

    r.push(x=traj[:, 0], y=traj[:, 1], ts=np.arange(p.traj_lengths[i]))
    r.push(bounds=[-5, 5])
    r.eval('''
    library(ggplot2)
    p = ggplot(data=data.frame(x=x, y=y, ts=ts), aes(x=x, y=y, color=ts))
    p = p + geom_path()
    #p = p + geom_point()
    p = p + scale_x_continuous(limits=bounds)
    p = p + scale_y_continuous(limits=bounds)
    p = p + scale_color_continuous(low='black', high='lightblue')
    p = p + ggtitle('One of the trajectories')
    ggsave('plot.png')
    system('open plot.png')
    ''')
Пример #49
0
def plot_raw_trajectory(i):
    from rmagic import r
    p = Project.load_from('project/Project.yaml')
    traj = p.load_traj(i)['XYZList']
    
    r.push(x=traj[:,0], y=traj[:,1], ts=np.arange(p.traj_lengths[i]))
    r.push(bounds=[-5,5])
    r.eval('''
    library(ggplot2)
    p = ggplot(data=data.frame(x=x, y=y, ts=ts), aes(x=x, y=y, color=ts))
    p = p + geom_path()
    #p = p + geom_point()
    p = p + scale_x_continuous(limits=bounds)
    p = p + scale_y_continuous(limits=bounds)
    p = p + scale_color_continuous(low='black', high='lightblue')
    p = p + ggtitle('One of the trajectories')
    ggsave('plot.png')
    system('open plot.png')
    ''')
Пример #50
0
def load(filename):
    # delay these imports, since this module is loaded in a bunch
    # of places but not necessarily used
    import scipy.io
    from msmbuilder import Project
    
    # the filename extension
    ext = os.path.splitext(filename)[1]

    # load trajectories
    if ext != '.h5' and ext in md._FormatRegistry.loaders.keys():
        val = md.load(filename)

    # load flat text files
    elif 'AtomIndices.dat' in filename:
        # try loading AtomIndices first, because the default for loadtxt
        # is to use floats
        val = np.loadtxt(filename, dtype=np.int)
    elif ext in ['.dat']:
        # try loading general .dats with floats
        val = np.loadtxt(filename)
    
    # short circuit opening ProjectInfo
    elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename) or (re.search('ProjectInfo.*\.yaml', filename)):
        val = Project.load_from(filename)
        
    # load with serializer files that end with .h5, .hdf or .h5.distances
    elif ext in ['.h5', '.hdf']:
        val = io.loadh(filename, deferred=False)
    elif filename.endswith('.h5.distances'):
        val = io.loadh(filename, deferred=False)

    # load matricies
    elif ext in ['.mtx']:
        val = scipy.io.mmread(filename)
        
    else:
        raise TypeError("I could not infer how to load this file. You "
            "can either request load=False, or perhaps add more logic to "
            "the load heuristics in this class: %s" % filename)

    return val
Пример #51
0
def entry_point():
    """Parse command line inputs, load up files, and build a movie."""
    args = parser.parse_args()
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)

    project = Project.load_from(args.project)
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(
        assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Пример #52
0
def entry_point():
    args, prep_metric = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    min_length = int(float(args.min_length))
    # need to convert to float first because int can't
    # convert a string that is '1E3' for example...weird.

    tica_obj = run(prep_metric,
                   project,
                   args.delta_time,
                   atom_indices=atom_indices,
                   output=args.output,
                   min_length=min_length,
                   stride=args.stride)
def entry_point():
    """Parse command line inputs, load up files, then call run() and save() to do
    the real work"""
    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    # load...
    # project
    project = Project.load_from(args.project)

    # assignments
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    # states
    if -1 in args.states:
        states = np.unique(assignments[np.where(assignments != -1)])
        logger.info('Yanking from all %d states', len(states))
    else:
        # ensure that the states are sorted, and that they're unique -- you
        # can only request each state once
        states = np.unique(args.states)
        logger.info("Yanking from the following states: %s", states)

    # extract the conformations using np.random for the randomness
    confs_by_state = project.get_random_confs_from_states(
        assignments,
        states=states,
        num_confs=args.conformations_per_state,
        replacement=args.replacement)

    # save the conformations to disk, in the requested style
    save(confs_by_state=confs_by_state,
         states=states,
         style=args.style,
         format=args.format,
         outdir=args.output_dir)
Пример #54
0
    of all atoms in a given trajectory, or for all trajectories in the project. The
    output is a hdf5 file which contains the SASA for each atom in each frame
    in each trajectory (or the single trajectory you passed in.""")
    parser.add_argument('project')
    parser.add_argument('atom_indices',
                        help='Indices of atoms to calculate SASA',
                        default='all')
    parser.add_argument('output',
                        help='''hdf5 file for output. Note this will
        be THREE dimensional: ( trajectory, frame, atom ), unless you just ask for
        one trajectory, in which case it will be shape (frame, atom).''',
                        default='SASA.h5')
    parser.add_argument('traj_fn',
                        help='''Pass a trajectory file if you only
        want to calclate the SASA for a single trajectory''',
                        default='all')
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)

    SASA = run(project, atom_indices, args.traj_fn)

    io.saveh(args.output, SASA)
Пример #55
0
def main(args, metric, logger):

    project = Project.load_from_hdf(args.project)
    if not os.path.exists(args.generators):
        raise IOError('Could not open generators')
    generators = os.path.abspath(args.generators)
    output_dir = os.path.abspath(args.output_dir)

    # connect to the workers
    try:
        json_file = client_json_file(args.profile, args.cluster_id)
        client = parallel.Client(json_file, timeout=2)
    except parallel.error.TimeoutError as exception:
        msg = '\nparallel.error.TimeoutError: ' + str(exception)
        msg += "\n\nPerhaps you didn't start a controller?\n"
        msg += "(hint, use ipcluster start)"
        print >> sys.stderr, msg
        sys.exit(1)

    lview = client.load_balanced_view()

    # partition the frames into a bunch of vtrajs
    all_vtrajs = local.partition(project, args.chunk_size)

    # initialze the containers to save to disk
    f_assignments, f_distances = local.setup_containers(
        output_dir, project, all_vtrajs)

    # get the chunks that have not been computed yet
    valid_indices = np.where(
        f_assignments.root.completed_vtrajs[:] == False)[0]
    remaining_vtrajs = np.array(all_vtrajs)[valid_indices].tolist()

    logger.info('%d/%d jobs remaining', len(remaining_vtrajs), len(all_vtrajs))

    # send the workers the files they need to get started
    # dview.apply_sync(remote.load_gens, generators, project['ConfFilename'],
    #    metric)

    # get the workers going
    n_jobs = len(remaining_vtrajs)
    amr = lview.map(remote.assign,
                    remaining_vtrajs, [generators] * n_jobs, [metric] * n_jobs,
                    chunksize=1)

    pending = set(amr.msg_ids)

    while pending:
        client.wait(pending, 1e-3)
        # finished is the set of msg_ids that are complete
        finished = pending.difference(client.outstanding)
        # update pending to exclude those that just finished
        pending = pending.difference(finished)
        for msg_id in finished:
            # we know these are done, so don't worry about blocking
            async = client.get_result(msg_id)

            assignments, distances, chunk = async .result[0]
            vtraj_id = local.save(f_assignments, f_distances, assignments,
                                  distances, chunk)

            log_status(logger, len(pending), n_jobs, vtraj_id, async)

    f_assignments.close()
    f_distances.close()

    logger.info('All done, exiting.')
Пример #56
0
)
w = np.loadtxt(
    '/Users/tud51931/voelzlab/analysis/LifsonRoig/scripts/test_Fs_RRR_ff03/w_params.dat'
)
v = np.loadtxt(
    '/Users/tud51931/voelzlab/analysis/LifsonRoig/scripts/test_Fs_RRR_ff03/v_params.dat'
)

I = np.argsort(l)
w_max = w[I[-1]]
v_max = v[I[-1]]

#assignment = io.loadh('/Volumes/Guangfeng/Fs-peptide/Fs-ff03-owlsnest/HelixCoil/Data/Assignments.h5','arr_0')
assignment = io.loadh('results/Nv.h5', 'arr_0')
project = Project.load_from(
    '/Volumes/Guangfeng/Fs-peptide/Fs-ff03-owlsnest/HelixCoil/ProjectInfo.yaml'
)
c = Counter(assignment.reshape(1, -1)[0])
populations = np.zeros(np.max(c.keys()) + 1)


def calculate_weight_frame(w_array, v_array, w_param, v_param):
    weight = 1.0
    for i in w_array * w_param:
        if i != 0:
            weight = weight * i
    for i in v_array * v_param:
        if i != 0:
            weight = weight * i
    return weight
Пример #57
0
import os, sys
from msmbuilder import Project
import mdtraj as md
from mdtraj import io
import numpy as np

project = Project.load_from("ProjectInfo-RRR.yaml")
Rgs = -1 * np.ones((project.n_trajs, max(project.traj_lengths)))

for i in range(project.n_trajs):
    t = project.load_traj(i)
    rg = md.compute_rg(t)
    Rgs[i][:len(rg)] = rg

io.saveh('Rgs-RRR.h5', Rgs)
Пример #58
0
                        default='Data/Gens.lh5')

    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    if -1 in args.states:
        print "Ripping PDBs for all states"
        args.states = 'all'

    if args.conformations_per_state == -1:
        print "Getting all PDBs for each state"
        args.conformations_per_state = 'all'

    atom_indices = np.loadtxt(args.lprmsd_atom_indices, np.int)
    assignments = Serializer.LoadData(args.assignments)
    project = Project.load_from_hdf(args.project)

    if args.lprmsd_permute_atoms == 'None':
        permute_indices = None
    else:
        permute_indices = ReadPermFile(args.lprmsd_permute_atoms)

    if args.lprmsd_alt_indices == 'None':
        alt_indices = None
    else:
        alt_indices = np.loadtxt(args.lprmsd_alt_indices, np.int)

    run(project, assignments, args.conformations_per_state, args.states,
        args.output_dir, args.generators, atom_indices, permute_indices,
        alt_indices, args.total_memory_gb)