def setup(self): self.metric = metrics.Dihedral() self.pdb_fn = os.path.join(fixtures_dir(), 'native.pdb') self.trj_fn = os.path.join(fixtures_dir(), 'trj0.lh5') self.project = Project({'traj_lengths': [501], 'traj_paths': [self.trj_fn], 'conf_filename': self.pdb_fn, 'traj_converted_from': [None], 'traj_errors': [None]}) self.vtraj = partition(self.project, chunk_size=501)[0]
def setup(self): self.metric = metrics.Dihedral() self.pdb_fn = os.path.join(fixtures_dir(), 'native.pdb') self.trj_fn = os.path.join(fixtures_dir(), 'trj0.lh5') self.project = Project({'NumTrajs': 1, 'TrajLengths': [501], 'TrajFileBaseName': 'trj', 'TrajFileType': '.lh5', 'ConfFilename': self.pdb_fn, 'TrajFilePath': fixtures_dir()}) self.vtraj = partition(self.project, chunk_size=501)[0]
def test_partition_1(): project = Project([2, 1, 10], 3) chunk_size = 4 got = partition(project, chunk_size) correct = [[(0, 0, 2), (1, 0, 1), (2, 0, 1)], [(2, 1, 5)], [(2, 5, 9)], [(2, 9, 10)]] assert [e.canonical() for e in got] == correct assert sum(len(e) for e in got) == sum(project.traj_lengths)
def test_partition_0(): project = {'TrajLengths': [2, 5]} chunk_size = 3 got = partition(project, chunk_size) correct = [[(0, 0, 2), (1, 0, 1)], [(1, 1, 4)], [(1, 4, 5)]] assert [e.canonical() for e in got] == correct assert sum(len(e) for e in got) == sum(project['TrajLengths'])
def test_partition_0(): project = Project([2, 5], 2) chunk_size = 3 got = partition(project, chunk_size) correct = [[(0, 0, 2), (1, 0, 1)], [(1, 1, 4)], [(1, 4, 5)]] assert [e.canonical() for e in got] == correct assert sum(len(e) for e in got) == sum(project.traj_lengths)
def test_partition_1(): project = {'TrajLengths': [2,1,10]} chunk_size = 4 got = partition(project, chunk_size) correct = [[(0,0,2), (1,0,1), (2,0,1)], [(2,1,5)], [(2,5,9)], [(2,9,10)]] assert [e.canonical() for e in got] == correct assert sum(len(e) for e in got) == sum(project['TrajLengths'])
def setup(self): self.metric = metrics.Dihedral() self.pdb_fn = os.path.join(fixtures_dir(), 'native.pdb') self.trj_fn = os.path.join(fixtures_dir(), 'trj0.lh5') self.project = Project({ 'NumTrajs': 1, 'TrajLengths': [501], 'TrajFileBaseName': 'trj', 'TrajFileType': '.lh5', 'ConfFilename': self.pdb_fn, 'TrajFilePath': fixtures_dir() }) self.vtraj = partition(self.project, chunk_size=501)[0]
def test_2(self): # reset the global remote.PREPARED=False # get a smaller vtraj, and just assign it to only the pDB vtraj = partition(self.project, chunk_size=10)[1] a,d,vtraj = assign(vtraj, self.pdb_fn, self.metric) # these are the right RMSD distances correct_d = np.array([ 0.26932446, 0.53129266, 0.64795935, 1.56435365, 1.05962805, 0.60572095, 0.47062515, 0.5758602 , 0.24565975, 0.69161412], dtype=np.float32) npt.assert_array_almost_equal(d, correct_d) npt.assert_array_equal(a, np.zeros(10))
def test_partition_0(): project = Project([2,5], 2) chunk_size = 3 got = partition(project, chunk_size) correct = [[(0, 0, 2), (1, 0, 1)], [(1, 1, 4)], [(1, 4,5)]] assert [e.canonical() for e in got] == correct assert sum(len(e) for e in got) == sum(project.traj_lengths)
def test_partition_0(): project = {'TrajLengths': [2,5]} chunk_size = 3 got = partition(project, chunk_size) correct = [[(0, 0, 2), (1, 0, 1)], [(1, 1, 4)], [(1, 4,5)]] assert [e.canonical() for e in got] == correct assert sum(len(e) for e in got) == sum(project['TrajLengths'])
def test_partition_3(): project = {'TrajLengths': [1, 0]} chunk_size = 1 partition(project, chunk_size)
def test_partition_2(): project = Project([1,1,-1], 3) chunk_size = 1 partition(project, chunk_size)
def setup(self): self.d = tempfile.mkdtemp() project = {'TrajLengths': [9, 10], 'NumTrajs': 2} self.vtrajs = partition(project, 3) self.fa, self.fd = setup_containers(self.d, project, self.vtrajs)
def test_partition_4(): project = {'TrajLengths': [1]} chunk_size = 11 got = partition(project, chunk_size) correct = [[(0, 0, 1)]] assert [e.canonical() for e in got] == correct
def test_partition_3(): project = Project([1,1.5,1], 3) chunk_size = 1 partition(project, chunk_size)
def test_partition_4(): project = Project([1], 1) chunk_size = 11 got = partition(project, chunk_size) correct = [[(0, 0, 1)]] assert [e.canonical() for e in got] == correct
def test_partition_3(): project = Project([1, 1.5, 1], 3) chunk_size = 1 partition(project, chunk_size)
def test_partition_3(): project = Project([1,0], 2) chunk_size = 1 partition(project, chunk_size)
def test_partition_4(): project = Project([1], 1) chunk_size = 11 got = partition(project, chunk_size) correct = [[(0,0,1)]] assert [e.canonical() for e in got] == correct
def setup(self): self.d = tempfile.mkdtemp() project = Project([9,10], 2) self.vtrajs = partition(project, 3) self.fa, self.fd = setup_containers(self.d, project, self.vtrajs)
def main(args, metric, logger): project = Project.load_from(args.project) if not os.path.exists(args.generators): raise IOError("Could not open generators") generators = os.path.abspath(args.generators) output_dir = os.path.abspath(args.output_dir) # connect to the workers try: json_file = client_json_file(args.profile, args.cluster_id) client = parallel.Client(json_file, timeout=2) except parallel.error.TimeoutError as exception: msg = "\nparallel.error.TimeoutError: " + str(exception) msg += "\n\nPerhaps you didn't start a controller?\n" msg += "(hint, use ipcluster start)" print >> sys.stderr, msg sys.exit(1) lview = client.load_balanced_view() # partition the frames into a bunch of vtrajs all_vtrajs = local.partition(project, args.chunk_size) # initialze the containers to save to disk f_assignments, f_distances = local.setup_containers(output_dir, project, all_vtrajs) # get the chunks that have not been computed yet valid_indices = np.where(f_assignments.root.completed_vtrajs[:] == False)[0] remaining_vtrajs = np.array(all_vtrajs)[valid_indices].tolist() logger.info("%d/%d jobs remaining", len(remaining_vtrajs), len(all_vtrajs)) # send the workers the files they need to get started # dview.apply_sync(remote.load_gens, generators, project['ConfFilename'], # metric) # get the workers going n_jobs = len(remaining_vtrajs) amr = lview.map(remote.assign, remaining_vtrajs, [generators] * n_jobs, [metric] * n_jobs, chunksize=1) pending = set(amr.msg_ids) while pending: client.wait(pending, 1e-3) # finished is the set of msg_ids that are complete finished = pending.difference(client.outstanding) # update pending to exclude those that just finished pending = pending.difference(finished) for msg_id in finished: # we know these are done, so don't worry about blocking async = client.get_result(msg_id) try: assignments, distances, chunk = async.result[0] except RemoteError as e: print "Remote Error:" e.print_traceback() raise vtraj_id = local.save(f_assignments, f_distances, assignments, distances, chunk) log_status(logger, len(pending), n_jobs, vtraj_id, async) f_assignments.close() f_distances.close() logger.info("All done, exiting.")
def test_partition_2(): project = Project([1, 1, -1], 3) chunk_size = 1 partition(project, chunk_size)
def test_partition_4(): project = {'TrajLengths': [1]} chunk_size = 11 got = partition(project, chunk_size) correct = [[(0,0,1)]] assert [e.canonical() for e in got] == correct
def test_partition_3(): project = Project([1, 0], 2) chunk_size = 1 partition(project, chunk_size)
def setup(self): self.d = tempfile.mkdtemp() project = {'TrajLengths': [9,10], 'NumTrajs':2} self.vtrajs = partition(project, 3) self.fa, self.fd = setup_containers(self.d, project, self.vtrajs)
def setup(self): self.d = tempfile.mkdtemp() project = Project([9, 10], 2) self.vtrajs = partition(project, 3) self.fa, self.fd = setup_containers(self.d, project, self.vtrajs)
def main(args, metric, logger): project = Project.load_from_hdf(args.project) if not os.path.exists(args.generators): raise IOError('Could not open generators') generators = os.path.abspath(args.generators) output_dir = os.path.abspath(args.output_dir) # connect to the workers try: json_file = client_json_file(args.profile, args.cluster_id) client = parallel.Client(json_file, timeout=2) except parallel.error.TimeoutError as exception: msg = '\nparallel.error.TimeoutError: ' + str(exception) msg += "\n\nPerhaps you didn't start a controller?\n" msg += "(hint, use ipcluster start)" print >> sys.stderr, msg sys.exit(1) lview = client.load_balanced_view() # partition the frames into a bunch of vtrajs all_vtrajs = local.partition(project, args.chunk_size) # initialze the containers to save to disk f_assignments, f_distances = local.setup_containers( output_dir, project, all_vtrajs) # get the chunks that have not been computed yet valid_indices = np.where( f_assignments.root.completed_vtrajs[:] == False)[0] remaining_vtrajs = np.array(all_vtrajs)[valid_indices].tolist() logger.info('%d/%d jobs remaining', len(remaining_vtrajs), len(all_vtrajs)) # send the workers the files they need to get started # dview.apply_sync(remote.load_gens, generators, project['ConfFilename'], # metric) # get the workers going n_jobs = len(remaining_vtrajs) amr = lview.map(remote.assign, remaining_vtrajs, [generators] * n_jobs, [metric] * n_jobs, chunksize=1) pending = set(amr.msg_ids) while pending: client.wait(pending, 1e-3) # finished is the set of msg_ids that are complete finished = pending.difference(client.outstanding) # update pending to exclude those that just finished pending = pending.difference(finished) for msg_id in finished: # we know these are done, so don't worry about blocking async = client.get_result(msg_id) assignments, distances, chunk = async .result[0] vtraj_id = local.save(f_assignments, f_distances, assignments, distances, chunk) log_status(logger, len(pending), n_jobs, vtraj_id, async) f_assignments.close() f_distances.close() logger.info('All done, exiting.')