def gpu_background(self, override_source=2): from simtbx.gpu import gpu_energy_channels gpu_channels_singleton = gpu_energy_channels(deviceId=0) self.SIM.device_Id = 0 # allocate GPU arrays from simtbx.gpu import exascale_api gpu_simulation = exascale_api(nanoBragg=self.SIM) gpu_simulation.allocate() from simtbx.gpu import gpu_detector as gpud gpu_detector = gpud(deviceId=self.SIM.device_Id, nanoBragg=self.SIM) gpu_detector.each_image_allocate() per_image_scale_factor = 0.0 gpu_detector.scale_in_place( per_image_scale_factor) # apply scale directly on GPU gpu_simulation.add_background(gpu_detector) gpu_detector.write_raw_pixels( self.SIM) # updates SIM.raw_pixels from GPU self.bg_multi = self.SIM.raw_pixels.as_numpy_array() gpu_detector.scale_in_place(per_image_scale_factor) gpu_simulation.add_background(detector=gpu_detector, override_source=override_source) gpu_detector.write_raw_pixels(self.SIM) self.bg_single = self.SIM.raw_pixels.as_numpy_array() gpu_detector.each_image_free()
def create_gpu_channels(cpu_channels, utilize): from libtbx.mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() devices_per_node = int(os.environ.get("DEVICES_PER_NODE", 1)) this_device = rank % devices_per_node from simtbx.gpu import gpu_energy_channels gpu_channels_singleton = gpu_energy_channels(deviceId=this_device) assert gpu_channels_singleton.get_deviceID() == this_device print("QQQ to gpu %d channels" % gpu_channels_singleton.get_nchannels(), "rank", rank) if gpu_channels_singleton.get_nchannels() == 0: # if uninitialized P = Profiler("Initialize the channels singleton rank %d, device %d" % (rank, this_device)) for x in range(len(cpu_channels)): print("starting with ", x) gpu_channels_singleton.structure_factors_to_GPU_direct( x, cpu_channels[x].indices(), cpu_channels[x].data()) print("Finished sending to gpu %d channels" % gpu_channels_singleton.get_nchannels()) del P assert len(cpu_channels) == utilize
def get_amplitudes(self, dials_model, refl_table, test_without_mpi=True): D = dials_model R = refl_table from cctbx.crystal import symmetry from cctbx.miller import array, set as miller_set uc = D.crystal.get_unit_cell() sg = D.crystal.get_space_group() MS = miller_set(symmetry(unit_cell=uc, space_group=sg), anomalous_flag=True, indices=R["miller_index"].select(R["spots_order"])) self.amplitudes = array(MS, data=flex.sqrt( R["spots_mockup_shoebox_sum"].select( R["spots_order"]))) from simtbx.gpu import gpu_energy_channels recommend_device = int(os.environ.get("CCTBX_RECOMMEND_DEVICE", 0)) self.gpu_channels_singleton = gpu_energy_channels( deviceId=recommend_device)
def get_amplitudes(self, dials_model, refl_table, test_without_mpi=True): from LS49.adse13_187.cyto_batch import parse_input self.params, options = parse_input() D = dials_model R = refl_table from cctbx.crystal import symmetry from cctbx.miller import array, set as miller_set uc = D.crystal.get_unit_cell() sg = D.crystal.get_space_group() MS = miller_set(symmetry(unit_cell=uc, space_group=sg), anomalous_flag=True, indices=R["miller_index"].select(R["spots_order"])) self.amplitudes = array(MS, data=flex.sqrt( R["spots_mockup_shoebox_sum"].select( R["spots_order"]))) from simtbx.gpu import gpu_energy_channels self.gpu_channels_singleton = gpu_energy_channels( deviceId=0) # determine device by rank id later
def create_gpu_channels_one_rank(cpu_channels, utilize): this_device = 0 from simtbx.gpu import gpu_energy_channels gpu_channels_singleton = gpu_energy_channels(deviceId=this_device) assert gpu_channels_singleton.get_deviceID() == this_device print("QQQ to gpu %d channels" % gpu_channels_singleton.get_nchannels(), "one rank") if gpu_channels_singleton.get_nchannels() == 0: # if uninitialized P = Profiler("Initialize the channels singleton rank None, device %d" % (this_device)) for x in range(len(cpu_channels)): print("starting with ", x) gpu_channels_singleton.structure_factors_to_GPU_direct( x, cpu_channels[x].indices(), cpu_channels[x].data()) print("Finished sending to gpu %d channels" % gpu_channels_singleton.get_nchannels()) del P assert len(cpu_channels) == utilize
def modularized_exafel_api_for_GPU(self, argchk=False, cuda_background=True): from simtbx.gpu import gpu_energy_channels gpu_channels_singleton = gpu_energy_channels(deviceId=0) SIM = nanoBragg(self.DETECTOR, self.BEAM, panel_id=0) SIM.device_Id = 0 assert gpu_channels_singleton.get_deviceID() == SIM.device_Id assert gpu_channels_singleton.get_nchannels() == 0 # uninitialized for x in range(len(self.flux)): gpu_channels_singleton.structure_factors_to_GPU_direct_cuda( x, self.sfall_channels[x].indices(), self.sfall_channels[x].data()) assert gpu_channels_singleton.get_nchannels() == len(self.flux) SIM.Ncells_abc = (20, 20, 20) SIM.Amatrix = sqr(self.CRYSTAL.get_A()).transpose() SIM.oversample = 2 if argchk: print("\npolychromatic GPU argchk") SIM.xtal_shape = shapetype.Gauss_argchk else: print("\npolychromatic GPU no argchk") SIM.xtal_shape = shapetype.Gauss SIM.interpolate = 0 # allocate GPU arrays from simtbx.gpu import exascale_api gpu_simulation = exascale_api(nanoBragg=SIM) gpu_simulation.allocate_cuda() from simtbx.gpu import gpu_detector as gpud gpu_detector = gpud(deviceId=SIM.device_Id, detector=self.DETECTOR, beam=self.BEAM) gpu_detector.each_image_allocate_cuda() # loop over energies for x in range(len(self.flux)): SIM.flux = self.flux[x] SIM.wavelength_A = self.wavlen[x] print( "USE_EXASCALE_API+++++++++++++ Wavelength %d=%.6f, Flux %.6e, Fluence %.6e" % (x, SIM.wavelength_A, SIM.flux, SIM.fluence)) gpu_simulation.add_energy_channel_from_gpu_amplitudes_cuda( x, gpu_channels_singleton, gpu_detector) per_image_scale_factor = 1.0 gpu_detector.scale_in_place_cuda( per_image_scale_factor) # apply scale directly on GPU SIM.wavelength_A = self.BEAM.get_wavelength( ) # return to canonical energy for subsequent background if cuda_background: SIM.Fbg_vs_stol = water SIM.amorphous_sample_thick_mm = 0.02 SIM.amorphous_density_gcm3 = 1 SIM.amorphous_molecular_weight_Da = 18 SIM.flux = 1e12 SIM.beamsize_mm = 0.003 # square (not user specified) SIM.exposure_s = 1.0 # multiplies flux x exposure gpu_simulation.add_background_cuda(gpu_detector) # deallocate GPU arrays afterward gpu_detector.write_raw_pixels_cuda( SIM) # updates SIM.raw_pixels from GPU gpu_detector.each_image_free_cuda() else: # deallocate GPU arrays up front gpu_detector.write_raw_pixels_cuda( SIM) # updates SIM.raw_pixels from GPU gpu_detector.each_image_free_cuda() SIM.Fbg_vs_stol = water SIM.amorphous_sample_thick_mm = 0.02 SIM.amorphous_density_gcm3 = 1 SIM.amorphous_molecular_weight_Da = 18 SIM.flux = 1e12 SIM.beamsize_mm = 0.003 # square (not user specified) SIM.exposure_s = 1.0 # multiplies flux x exposure SIM.progress_meter = False SIM.add_background() return SIM
def run_batch_job(test_without_mpi=False): params,options = parse_input() if params.log.by_rank: import io, sys if params.log.rank_profile: import cProfile pr = cProfile.Profile() pr.enable() # workaround for getting master nexus os.environ["NXMX_LOCAL_DATA"] = params.nxmx_local_data if test_without_mpi or params.test_without_mpi: from LS49.adse13_196.mock_mpi import mpiEmulator MPI = mpiEmulator() else: from libtbx.mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() import omptbx workaround_nt = int(os.environ.get("OMP_NUM_THREADS",1)) omptbx.omp_set_num_threads(workaround_nt) N_stride = size # total number of worker tasks print("hello from rank %d of %d"%(rank,size),"with omp_threads=",omp_get_num_procs()) import datetime start_comp = time() print(rank, time(), "finished with the calculation of channels, now construct single broadcast") if rank == 0: print("Rank 0 time", datetime.datetime.now()) spectrum_dict = {} from iotbx.reflection_file_reader import any_reflection_file from LS49 import ls49_big_data merge_file = os.path.join(ls49_big_data,"adse13_228","cyto_init_merge.mtz") Fmerge = any_reflection_file(merge_file).as_miller_arrays()[0].as_amplitude_array() print("Fmerge min/max = %f / %f" % (min(Fmerge.data()), max(Fmerge.data()))) transmitted_info = dict(spectra = spectrum_dict, amplitudes = Fmerge, ) else: transmitted_info = None transmitted_info = comm.bcast(transmitted_info, root = 0) comm.barrier() parcels = list(range(rank,params.N_total,N_stride)) print(rank, time(), "finished with single broadcast, now set up the rank logger") if params.log.by_rank: expand_dir = os.path.expandvars(params.log.outdir) os.makedirs(expand_dir, exist_ok=True) log_path = os.path.join(expand_dir,"rank_%d.log"%rank) error_path = os.path.join(expand_dir,"rank_%d.err"%rank) #print("Rank %d redirecting stdout/stderr to"%rank, log_path, error_path) sys.stdout = io.TextIOWrapper(open(log_path,'ab', 0), write_through=True) sys.stderr = io.TextIOWrapper(open(error_path,'ab', 0), write_through=True) print(rank, time(), "finished with the rank logger, now construct the GPU cache container") try: from simtbx.gpu import gpu_energy_channels gpu_channels_singleton = gpu_energy_channels ( deviceId = rank % params.devices_per_node ) # singleton will instantiate, regardless of cuda, device count, or exascale API except ImportError: gpu_channels_singleton = None comm.barrier() import random while len(parcels)>0: idx = random.choice(parcels) cache_time = time() print("idx------start-------->",idx,"rank",rank,time()) # if rank==0: os.system("nvidia-smi") tst_one(i_exp=idx,spectra=transmitted_info["spectra"], Fmerge=transmitted_info["amplitudes"], gpu_channels_singleton=gpu_channels_singleton, rank=rank,params=params ) parcels.remove(idx) print("idx------finis-------->",idx,"rank",rank,time(),"elapsed",time()-cache_time) comm.barrier() print("Overall rank",rank,"at",datetime.datetime.now(), "seconds elapsed after srun startup %.3f"%(time()-start_elapse)) print("Overall rank",rank,"at",datetime.datetime.now(), "seconds elapsed after Python imports %.3f"%(time()-start_comp)) if params.log.rank_profile: pr.disable() pr.dump_stats("cpu_%d.prof"%rank)
def model_spots_from_pandas(pandas_frame, rois_per_panel=None, mtz_file=None, mtz_col=None, oversample_override=None, Ncells_abc_override=None, pink_stride_override=None, spectrum_override=None, cuda=False, device_Id=0, time_panels=False, d_max=999, d_min=1.5, defaultF=1e3, omp=False, norm_by_spectrum=False, symbol_override=None, quiet=False, reset_Bmatrix=False, nopolar=False, force_no_detector_thickness=False, printout_pix=None, norm_by_nsource=False, use_exascale_api=False, use_db=False): if use_exascale_api: assert gpu_energy_channels is not None, "cant use exascale api if not in a GPU build" assert multipanel_sim is not None, "cant use exascale api if LS49: https://github.com/nksauter/LS49.git is not configured\n install in the modules folder" df = pandas_frame if not quiet: LOGGER.info("Loading experiment models") expt_name = df.opt_exp_name.values[0] El = ExperimentListFactory.from_json_file(expt_name, check_format=False) expt = El[0] columns = list(df) if "detz_shift_mm" in columns: # NOTE, this could also be inside expt_name directly expt.detector = utils.shift_panelZ(expt.detector, df.detz_shift_mm.values[0]) if force_no_detector_thickness: expt.detector = utils.strip_thickness_from_detector(expt.detector) if reset_Bmatrix: ucell_params = df[["a", "b", "c", "al", "be", "ga"]].values[0] ucell_man = utils.manager_from_params(ucell_params) expt.crystal.set_B(ucell_man.B_recipspace) assert len(df) == 1 Ncells_abc = df.ncells.values[0] if Ncells_abc_override is not None: Ncells_abc = Ncells_abc_override spot_scale = df.spot_scales.values[0] beamsize_mm = df.beamsize_mm.values[0] total_flux = df.total_flux.values[0] oversample = df.oversample.values[0] if oversample_override is not None: oversample = oversample_override # get the optimized spectra if spectrum_override is None: if "spectrum_filename" in list( df) and df.spectrum_filename.values[0] is not None: spectrum_file = df.spectrum_filename.values[0] pink_stride = df.spectrum_stride.values[0] if norm_by_spectrum: nspec = utils.load_spectra_file(spectrum_file)[0].shape[0] spot_scale = spot_scale / nspec if pink_stride_override is not None: pink_stride = pink_stride_override fluxes, energies = utils.load_spectra_file(spectrum_file, total_flux=total_flux, pinkstride=pink_stride) else: fluxes = np.array([total_flux]) energies = np.array( [utils.ENERGY_CONV / expt.beam.get_wavelength()]) if not quiet: LOGGER.info("Running MONO sim") else: wavelens, fluxes = map(np.array, zip(*spectrum_override)) energies = utils.ENERGY_CONV / wavelens lam0 = df.lam0.values[0] lam1 = df.lam1.values[0] if lam0 == -1: lam0 = 0 if lam1 == -1: lam1 = 1 wavelens = utils.ENERGY_CONV / energies wavelens = lam0 + lam1 * wavelens energies = utils.ENERGY_CONV / wavelens if mtz_file is not None: assert mtz_col is not None Famp = utils.open_mtz(mtz_file, mtz_col) else: Famp = utils.make_miller_array_from_crystal(expt.crystal, dmin=d_min, dmax=d_max, defaultF=defaultF, symbol=symbol_override) diffuse_params = None if "use_diffuse_models" in columns and df.use_diffuse_models.values[0]: if not use_db: raise RuntimeError( "Cant simulate diffuse models unless use_db=True (diffBragg modeler)" ) diffuse_params = { "gamma": tuple(df.diffuse_gamma.values[0]), "sigma": tuple(df.diffuse_sigma.values[0]), "gamma_miller_units": False } if "gamma_miller_units" in list(df): diffuse_params[ "gamma_miller_units"] = df.gamma_miller_units.values[0] if use_exascale_api: #=================== gpu_channels_singleton = gpu_energy_channels(deviceId=0) print(gpu_channels_singleton.get_deviceID(), "device") from simtbx.nanoBragg import nanoBragg_crystal C = nanoBragg_crystal.NBcrystal(init_defaults=False) C.miller_array = Famp F_P1 = C.miller_array F_P1 = Famp.expand_to_p1() gpu_channels_singleton.structure_factors_to_GPU_direct( 0, F_P1.indices(), F_P1.data()) Famp = gpu_channels_singleton #=========== results, _, _ = multipanel_sim(CRYSTAL=expt.crystal, DETECTOR=expt.detector, BEAM=expt.beam, Famp=Famp, energies=energies, fluxes=fluxes, Ncells_abc=Ncells_abc, beamsize_mm=beamsize_mm, oversample=oversample, spot_scale_override=spot_scale, default_F=0, interpolate=0, include_background=False, profile="gauss", cuda=True, show_params=False) return results, expt elif use_db: results = diffBragg_forward(CRYSTAL=expt.crystal, DETECTOR=expt.detector, BEAM=expt.beam, Famp=Famp, fluxes=fluxes, energies=energies, beamsize_mm=beamsize_mm, Ncells_abc=Ncells_abc, spot_scale_override=spot_scale, device_Id=device_Id, oversample=oversample, show_params=not quiet, nopolar=nopolar, printout_pix=printout_pix, diffuse_params=diffuse_params, cuda=cuda) return results, expt else: pids = None if rois_per_panel is not None: pids = list(rois_per_panel.keys()), results = flexBeam_sim_colors(CRYSTAL=expt.crystal, DETECTOR=expt.detector, BEAM=expt.beam, Famp=Famp, fluxes=fluxes, energies=energies, beamsize_mm=beamsize_mm, Ncells_abc=Ncells_abc, spot_scale_override=spot_scale, cuda=cuda, device_Id=device_Id, oversample=oversample, time_panels=time_panels and not quiet, pids=pids, rois_perpanel=rois_per_panel, omp=omp, show_params=not quiet, nopolar=nopolar, printout_pix=printout_pix) if norm_by_nsource: return np.array([image / len(energies) for _, image in results]), expt else: return np.array([image for _, image in results]), expt