示例#1
0
def run_LY99_batch(test_without_mpi=False):
    params, options = parse_input()
    log_by_rank = bool(int(os.environ.get("LOG_BY_RANK", 0)))
    rank_profile = bool(int(os.environ.get("RANK_PROFILE", 1)))
    if log_by_rank:
        import io, sys
    if rank_profile:
        import cProfile
        pr = cProfile.Profile()
        pr.enable()

    if test_without_mpi:
        from LS49.adse13_196.mock_mpi import mpiEmulator
        MPI = mpiEmulator()
    else:
        from libtbx.mpi4py import MPI

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    import omptbx
    workaround_nt = int(os.environ.get("OMP_NUM_THREADS", 1))
    omptbx.omp_set_num_threads(workaround_nt)
    N_total = int(os.environ["N_SIM"])  # number of items to simulate
    N_stride = size  # total number of worker tasks
    print("hello from rank %d of %d" % (rank, size), "with omp_threads=",
          omp_get_num_procs())
    import datetime
    start_comp = time()

    # now inside the Python imports, begin energy channel calculation

    wavelength_A = 1.74  # general ballpark X-ray wavelength in Angstroms
    wavlen = flex.double([12398.425 / (7070.5 + w) for w in range(100)])
    direct_algo_res_limit = 1.7

    local_data = data()  # later put this through broadcast

    GF = gen_fmodel(resolution=direct_algo_res_limit,
                    pdb_text=local_data.get("pdb_lines"),
                    algorithm="fft",
                    wavelength=wavelength_A)
    GF.set_k_sol(0.435)
    GF.make_P1_primitive()

    # Generating sf for my wavelengths
    sfall_channels = {}
    for x in range(len(wavlen)):
        if rank > len(wavlen): break
        if x % size != rank: continue

        GF.reset_wavelength(wavlen[x])
        GF.reset_specific_at_wavelength(
            label_has="FE1",
            tables=local_data.get("Fe_oxidized_model"),
            newvalue=wavlen[x])
        GF.reset_specific_at_wavelength(
            label_has="FE2",
            tables=local_data.get("Fe_reduced_model"),
            newvalue=wavlen[x])
        sfall_channels[x] = GF.get_amplitudes()

    reports = comm.gather(sfall_channels, root=0)
    if rank == 0:
        sfall_channels = {}
        for report in reports:
            sfall_channels.update(report)
    comm.barrier()

    print(
        rank, time(),
        "finished with the calculation of channels, now construct single broadcast"
    )

    if rank == 0:
        print("Rank 0 time", datetime.datetime.now())
        from LS49.spectra.generate_spectra import spectra_simulation
        from LS49.adse13_196.revapi.LY99_pad import microcrystal
        print("hello2 from rank %d of %d" % (rank, size))
        SS = spectra_simulation()
        C = microcrystal(
            Deff_A=4000, length_um=4.,
            beam_diameter_um=1.0)  # assume smaller than 10 um crystals
        from LS49 import legacy_random_orientations
        random_orientations = legacy_random_orientations(N_total)
        transmitted_info = dict(spectra=SS,
                                crystal=C,
                                sfall_info=sfall_channels,
                                random_orientations=random_orientations)
    else:
        transmitted_info = None
    transmitted_info = comm.bcast(transmitted_info, root=0)
    comm.barrier()
    parcels = list(range(rank, N_total, N_stride))

    print(rank, time(),
          "finished with single broadcast, now set up the rank logger")

    if log_by_rank:
        expand_dir = os.path.expandvars(params.logger.outdir)
        log_path = os.path.join(expand_dir, "rank_%d.log" % rank)
        error_path = os.path.join(expand_dir, "rank_%d.err" % rank)
        #print("Rank %d redirecting stdout/stderr to"%rank, log_path, error_path)
        sys.stdout = io.TextIOWrapper(open(log_path, 'ab', 0),
                                      write_through=True)
        sys.stderr = io.TextIOWrapper(open(error_path, 'ab', 0),
                                      write_through=True)

    print(
        rank, time(),
        "finished with the rank logger, now construct the GPU cache container")

    import random
    gpu_instance = get_exascale("gpu_instance", params.context)
    gpu_energy_channels = get_exascale("gpu_energy_channels", params.context)

    gpu_run = gpu_instance(deviceId=rank %
                           int(os.environ.get("DEVICES_PER_NODE", 1)))
    gpu_channels_singleton = gpu_energy_channels(
        deviceId=gpu_run.get_deviceID())
    # singleton will instantiate, regardless of gpu, device count, or exascale API

    comm.barrier()
    while len(parcels) > 0:
        idx = random.choice(parcels)
        cache_time = time()
        print("idx------start-------->", idx, "rank", rank, time())
        # if rank==0: os.system("nvidia-smi")
        tst_one(
            image=idx,
            spectra=transmitted_info["spectra"],
            crystal=transmitted_info["crystal"],
            random_orientation=transmitted_info["random_orientations"][idx],
            sfall_channels=transmitted_info["sfall_info"],
            gpu_channels_singleton=gpu_channels_singleton,
            rank=rank,
            params=params)
        parcels.remove(idx)
        print("idx------finis-------->", idx, "rank", rank, time(), "elapsed",
              time() - cache_time)
    comm.barrier()
    del gpu_channels_singleton
    # avoid Kokkos allocation "device_Fhkl" being deallocated after Kokkos::finalize was called
    print("Overall rank", rank, "at", datetime.datetime.now(),
          "seconds elapsed after srun startup %.3f" % (time() - start_elapse))
    print("Overall rank", rank, "at", datetime.datetime.now(),
          "seconds elapsed after Python imports %.3f" % (time() - start_comp))
    if rank_profile:
        pr.disable()
        pr.dump_stats("cpu_%d.prof" % rank)
示例#2
0
def run_batch_job(test_without_mpi=False):
  params,options = parse_input()
  if params.log.by_rank:
    import io, sys
  if params.log.rank_profile:
    import cProfile
    pr = cProfile.Profile()
    pr.enable()

  # workaround for getting master nexus
  os.environ["NXMX_LOCAL_DATA"] = params.nxmx_local_data
  if test_without_mpi or params.test_without_mpi:
    from LS49.adse13_196.mock_mpi import mpiEmulator
    MPI = mpiEmulator()
  else:
    from libtbx.mpi4py import MPI

  comm = MPI.COMM_WORLD
  rank = comm.Get_rank()
  size = comm.Get_size()
  import omptbx
  workaround_nt = int(os.environ.get("OMP_NUM_THREADS",1))
  omptbx.omp_set_num_threads(workaround_nt)
  N_stride = size # total number of worker tasks
  print("hello from rank %d of %d"%(rank,size),"with omp_threads=",omp_get_num_procs())
  import datetime
  start_comp = time()

  print(rank, time(),
    "finished with the calculation of channels, now construct single broadcast")

  if rank == 0:
    print("Rank 0 time", datetime.datetime.now())

    spectrum_dict = {}

    from iotbx.reflection_file_reader import any_reflection_file
    from LS49 import ls49_big_data
    merge_file = os.path.join(ls49_big_data,"adse13_228","cyto_init_merge.mtz")
    Fmerge = any_reflection_file(merge_file).as_miller_arrays()[0].as_amplitude_array()

    print("Fmerge min/max = %f / %f" % (min(Fmerge.data()), max(Fmerge.data())))

    transmitted_info = dict(spectra = spectrum_dict,
                            amplitudes = Fmerge,
                            )
  else:
    transmitted_info = None
  transmitted_info = comm.bcast(transmitted_info, root = 0)
  comm.barrier()
  parcels = list(range(rank,params.N_total,N_stride))

  print(rank, time(), "finished with single broadcast, now set up the rank logger")

  if params.log.by_rank:
    expand_dir = os.path.expandvars(params.log.outdir)
    os.makedirs(expand_dir, exist_ok=True)
    log_path = os.path.join(expand_dir,"rank_%d.log"%rank)
    error_path = os.path.join(expand_dir,"rank_%d.err"%rank)
    #print("Rank %d redirecting stdout/stderr to"%rank, log_path, error_path)
    sys.stdout = io.TextIOWrapper(open(log_path,'ab', 0), write_through=True)
    sys.stderr = io.TextIOWrapper(open(error_path,'ab', 0), write_through=True)

  print(rank, time(), "finished with the rank logger, now construct the GPU cache container")

  try:
    from simtbx.gpu import gpu_energy_channels
    gpu_channels_singleton = gpu_energy_channels (
      deviceId = rank % params.devices_per_node )
      # singleton will instantiate, regardless of cuda, device count, or exascale API
  except ImportError:
    gpu_channels_singleton = None
  comm.barrier()
  import random
  while len(parcels)>0:
    idx = random.choice(parcels)
    cache_time = time()
    print("idx------start-------->",idx,"rank",rank,time())
    # if rank==0: os.system("nvidia-smi")
    tst_one(i_exp=idx,spectra=transmitted_info["spectra"],
        Fmerge=transmitted_info["amplitudes"],
        gpu_channels_singleton=gpu_channels_singleton,
        rank=rank,params=params
    )
    parcels.remove(idx)
    print("idx------finis-------->",idx,"rank",rank,time(),"elapsed",time()-cache_time)
  comm.barrier()
  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after srun startup %.3f"%(time()-start_elapse))
  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after Python imports %.3f"%(time()-start_comp))
  if params.log.rank_profile:
    pr.disable()
    pr.dump_stats("cpu_%d.prof"%rank)
示例#3
0
def run_batch_job(test_without_mpi=False):
  from LS49.adse13_187.cyto_batch import parse_input as cyto_batch_parse_input
  params,options = cyto_batch_parse_input()
  if params.log.by_rank:
    import io, sys
  if params.log.rank_profile:
    import cProfile
    pr = cProfile.Profile()
    pr.enable()

  if test_without_mpi or params.test_without_mpi:
    from LS49.adse13_196.mock_mpi import mpiEmulator
    MPI = mpiEmulator()
  else:
    from libtbx.mpi4py import MPI

  comm = MPI.COMM_WORLD
  rank = comm.Get_rank()
  size = comm.Get_size()
  import omptbx
  workaround_nt = int(os.environ.get("OMP_NUM_THREADS",1))
  omptbx.omp_set_num_threads(workaround_nt)
  N_stride = size # total number of worker tasks
  print("hello from rank %d of %d"%(rank,size),"with omp_threads=",omp_get_num_procs())
  import datetime
  start_comp = time()

  if params.log.by_rank:
    expand_dir = os.path.expandvars(params.log.outdir)
    os.makedirs(expand_dir, exist_ok=True)
    log_path = os.path.join(expand_dir,"rank_%d.log"%rank)
    error_path = os.path.join(expand_dir,"rank_%d.err"%rank)
    #print("Rank %d redirecting stdout/stderr to"%rank, log_path, error_path)
    sys.stdout = io.TextIOWrapper(open(log_path,'ab', 0), write_through=True)
    sys.stderr = io.TextIOWrapper(open(error_path,'ab', 0), write_through=True)

  print(rank, time(), "finished with the rank logger, now delgate parcels")
  os.environ["CCTBX_RECOMMEND_DEVICE"] = "%d"%(rank % int(os.environ.get("CCTBX_DEVICE_PER_NODE",1)))
  print("rank", rank, "device", os.environ["CCTBX_RECOMMEND_DEVICE"])
  N_start = int(os.environ.get("N_START",0))

  comm.barrier()
  if rank == 0:
    os.system("nvidia-smi")
    # client process (requests all the work)
    import random
    parcels = list(range(N_start,N_start + params.N_total))
    while len(parcels) > 0:
      idx = parcels[0]    # random.choice(parcels)
      rankreq = comm.recv(source = MPI.ANY_SOURCE)
      print("Sending parcel",idx,"to rank",rankreq)
      comm.send(idx,dest = rankreq)
      parcels.remove(idx)
    # finally send a stop command to each process
    for rankreq in range(size-1):
      rankreq = comm.recv(source=MPI.ANY_SOURCE)
      comm.send('endrun',dest=rankreq)
  else:
    # server process (does all the work)
    while True:
      # inform the client this worker is ready for an event
      comm.send(rank,dest=0)
      idx = comm.recv(source=0)
      if idx == 'endrun':
        break
      cache_time = time()
      print("idx------start-------->",idx,"rank",rank,time())
      thin_ds1(idx,frame_params=params)
      print("idx------finis-------->",idx,
            "rank",rank,time(),"elapsed %.3fs"%(time()-cache_time))
  comm.barrier()

  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after srun startup %.3f"%(time()-start_elapse))
  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after Python imports %.3f"%(time()-start_comp))
  if params.log.rank_profile:
    pr.disable()
    pr.dump_stats("cpu_%d.prof"%rank)