) parser.add_argument("output_dir", default="PDBs") args = parser.parse_args() if -1 in args.states: print "Ripping PDBs for all states" args.states = "all" if args.conformations_per_state == -1: print "Getting all PDBs for each state" args.conformations_per_state = "all" atom_indices = np.loadtxt(args.lprmsd_atom_indices, np.int) assignments = Serializer.LoadData(args.assignments) project = Project.load_from_hdf(args.project) if args.lprmsd_permute_atoms == "None": permute_indices = None else: permute_indices = ReadPermFile(args.lprmsd_permute_atoms) if args.lprmsd_alt_indices == "None": alt_indices = None else: alt_indices = np.loadtxt(args.lprmsd_alt_indices, np.int) run( project, assignments, args.conformations_per_state,
default='Data/Gens.lh5') parser.add_argument('output_dir', default='PDBs') args = parser.parse_args() if -1 in args.states: print "Ripping PDBs for all states" args.states = 'all' if args.conformations_per_state == -1: print "Getting all PDBs for each state" args.conformations_per_state = 'all' atom_indices = np.loadtxt(args.lprmsd_atom_indices, np.int) assignments = Serializer.LoadData(args.assignments) project = Project.load_from_hdf(args.project) if args.lprmsd_permute_atoms == 'None': permute_indices = None else: permute_indices = ReadPermFile(args.lprmsd_permute_atoms) if args.lprmsd_alt_indices == 'None': alt_indices = None else: alt_indices = np.loadtxt(args.lprmsd_alt_indices, np.int) run(project, assignments, args.conformations_per_state, args.states, args.output_dir, args.generators, atom_indices, permute_indices, alt_indices, args.total_memory_gb)
def main(args, metric, logger): project = Project.load_from_hdf(args.project) if not os.path.exists(args.generators): raise IOError('Could not open generators') generators = os.path.abspath(args.generators) output_dir = os.path.abspath(args.output_dir) # connect to the workers try: json_file = client_json_file(args.profile, args.cluster_id) client = parallel.Client(json_file, timeout=2) except parallel.error.TimeoutError as exception: msg = '\nparallel.error.TimeoutError: ' + str(exception) msg += "\n\nPerhaps you didn't start a controller?\n" msg += "(hint, use ipcluster start)" print >> sys.stderr, msg sys.exit(1) lview = client.load_balanced_view() # partition the frames into a bunch of vtrajs all_vtrajs = local.partition(project, args.chunk_size) # initialze the containers to save to disk f_assignments, f_distances = local.setup_containers( output_dir, project, all_vtrajs) # get the chunks that have not been computed yet valid_indices = np.where( f_assignments.root.completed_vtrajs[:] == False)[0] remaining_vtrajs = np.array(all_vtrajs)[valid_indices].tolist() logger.info('%d/%d jobs remaining', len(remaining_vtrajs), len(all_vtrajs)) # send the workers the files they need to get started # dview.apply_sync(remote.load_gens, generators, project['ConfFilename'], # metric) # get the workers going n_jobs = len(remaining_vtrajs) amr = lview.map(remote.assign, remaining_vtrajs, [generators] * n_jobs, [metric] * n_jobs, chunksize=1) pending = set(amr.msg_ids) while pending: client.wait(pending, 1e-3) # finished is the set of msg_ids that are complete finished = pending.difference(client.outstanding) # update pending to exclude those that just finished pending = pending.difference(finished) for msg_id in finished: # we know these are done, so don't worry about blocking async = client.get_result(msg_id) assignments, distances, chunk = async .result[0] vtraj_id = local.save(f_assignments, f_distances, assignments, distances, chunk) log_status(logger, len(pending), n_jobs, vtraj_id, async) f_assignments.close() f_distances.close() logger.info('All done, exiting.')