def test_non_ovverlap(self): self.cm.add_task('T1', len(numa.getInfo()['0']['cores']) // 2) self.cm.add_task('T2', len(numa.getInfo()['0']['cores']) // 2) T1 = self.cm.get_cores('T1') T2 = self.cm.get_cores('T2') #print("XXX", T1, type(T1)) self.assertEqual(len(T1), len(set(T1)), "Cores associated to task multiple times: {}".format(T1)) self.assertEqual(len(T2), len(set(T2)), "Cores associated to task multiple times: {}".format(T2)) self.assertEqual(len(T2 + T1), len(set(T2 + T1)), "Cores not unique: T1={}, T2={}".format(T1, T2))
def __finalize_cores(self): self.__new_task = False numa_info = numa.getInfo()[self.__numa_node] cores = copy.deepcopy(numa_info['cores']) iso_cores = copy.deepcopy(numa_info['isolated_cores']) # ToDo: Add advanced checks so that tasks requiring isolated cores get them first etc. for taskname, taskprops in self.__tasks.items(): if taskprops['require_isolated']: log.debug("Task {}: Requeste cores {} - available: {}".format(taskname, taskprops['requested_cores'], len(iso_cores))) if len(iso_cores) < taskprops['requested_cores']: raise RuntimeError("Task {} requires {} isolated cores, but only {} are available.".format(taskname, taskprops['requested_cores'], len(iso_cores))) taskprops['reserved_cores'] = iso_cores[:taskprops['requested_cores']] iso_cores = iso_cores[taskprops['requested_cores']:] else: if taskprops['prefere_isolated']: if len(iso_cores) < taskprops['requested_cores']: cores = iso_cores + cores iso_cores = [] else: cores = iso_cores[:taskprops['requested_cores']] + cores iso_cores = iso_cores[taskprops['requested_cores']:] if len(cores) < taskprops['requested_cores']: cores = cores + iso_cores iso_cores = [] if len(cores) < taskprops['requested_cores']: log.warning("Requested reservation of {} cores for task {}, but only {} available on node!".format(taskprops['requested_cores'], taskname, len(cores))) taskprops['reserved_cores'] = numa_info['cores'] else: taskprops['reserved_cores'] = cores[:taskprops['requested_cores']] cores = cores[taskprops['requested_cores']:] log.debug("Assigned cores: %s", ", ".join(cores))
def capture_start(self, config_json=""): """ @brief start streaming output """ log.info("Starting EDD backend") try: for i, streamid in enumerate(self._config['input_data_streams']): stream_description = self._config['input_data_streams'][streamid] mkrecvheader_file = tempfile.NamedTemporaryFile(delete=False) log.debug("Creating mkrec header file: {}".format(mkrecvheader_file.name)) mkrecvheader_file.write(_mkrecv_header) # DADA may need this # ToDo: Check for input stream definitions mkrecvheader_file.write("NBIT {}\n".format(stream_description["bit_depth"])) mkrecvheader_file.write("HEAP_SIZE {}\n".format(self.input_heapSize)) mkrecvheader_file.write("\n#OTHER PARAMETERS\n") mkrecvheader_file.write("samples_per_block {}\n".format(self._config["samples_per_block"])) mkrecvheader_file.write("\n#PARAMETERS ADDED AUTOMATICALLY BY MKRECV\n") mkrecvheader_file.close() cfg = self._config.copy() cfg.update(stream_description) if not self._config['dummy_input']: numa_node = self.__numa_node_pool[i] fastest_nic, nic_params = numa.getFastestNic(numa_node) log.info("Receiving data for {} on NIC {} [ {} ] @ {} Mbit/s".format(streamid, fastest_nic, nic_params['ip'], nic_params['speed'])) physcpu = ",".join(numa.getInfo()[numa_node]['cores'][2:7]) if self._config['idx1_modulo'] == 'auto': idx1modulo = 10*cfg["samples_per_block"] / stream_description["samples_per_heap"] else: idx1modulo = self._config['idx1_modulo'] cmd = "taskset -c {physcpu} mkrecv_v4 --quiet --header {mkrecv_header} --idx1-step {samples_per_heap} --heap-size {input_heap_size} --idx1-modulo {idx1modulo} \ --dada-key {dada_key} --sync-epoch {sync_time} --sample-clock {sample_rate} \ --ibv-if {ibv_if} --port {port} {ip}".format(mkrecv_header=mkrecvheader_file.name, physcpu=physcpu,ibv_if=nic_params['ip'], input_heap_size=self.input_heapSize, idx1modulo=idx1modulo, **cfg ) mk = ManagedProcess(cmd, stdout_handler=self._polarization_sensors[streamid]["mkrecv_sensors"].stdout_handler) else: log.warning("Creating Dummy input instead of listening to network!") cmd = "dada_junkdb -c 1 -R 1000 -t 3600 -k {dada_key} {mkrecv_header}".format(mkrecv_header=mkrecvheader_file.name, **cfg ) mk = ManagedProcess(cmd) self.mkrec_cmd.append(mk) self._subprocessMonitor.add(mk, self._subprocess_error) except Exception as E: log.error("Error starting pipeline: {}".format(E)) raise E else: self.__watchdogs = [] for i, k in enumerate(self._config['input_data_streams']): wd = SensorWatchdog(self._polarization_sensors[streamid]["input-buffer-total-write"], 10 * self._integration_time_status.value(), self.watchdog_error) wd.start() self.__watchdogs.append(wd)
def __init__(self, ip, port): """@brief initialize the pipeline.""" EDDPipeline.__init__(self, ip, port, DEFAULT_CONFIG) self.mkrec_cmd = [] self._dada_buffers = ["dada", "dadc"] self._dada_buffers_monitor = [] self._data_processing_proc = None self._mkrecv_ingest_proc = None self._archive_directory_monitor = None # Pick first available numa node. Disable non-available nodes via # EDD_ALLOWED_NUMA_NODES environment variable self.numa_number = numa.getInfo().keys()[0]
def capture_start(self): """ """ log.debug("Starting FITS interface capture") nic_name, nic_description = numa.getFastestNic() self._capture_interface = nic_description['ip'] log.debug("Capturing on interface {}, ip: {}, speed: {} Mbit/s".format( nic_name, nic_description['ip'], nic_description['speed'])) affinity = numa.getInfo()[nic_description['node']]['cores'] handler = GatedSpectrometerSpeadHandler( self._fw_connection_manager, len(self._config['input_data_streams']), drop_invalid_packages=self._config['drop_nans']) self._capture_thread = SpeadCapture(self.mc_interface, self.mc_port, self._capture_interface, handler, affinity) self._capture_thread.start()
def capture_start(self): """ """ _log.info("Starting capture") nic_name, nic_description = numa.getFastestNic() self._capture_interface = nic_description['ip'] _log.info("Capturing on interface {}, ip: {}, speed: {} Mbit/s".format( nic_name, nic_description['ip'], nic_description['speed'])) affinity = numa.getInfo()[nic_description['node']]['cores'] self._capture_threads = [] for hdf5_group_prefix, mcg in self.mc_subscriptions.items(): spead_handler = GatedSpectrometerSpeadHandler( hdf5_group_prefix, mcg['attributes']) ct = SpeadCapture(mcg["groups"], mcg["port"], self._capture_interface, spead_handler, self._package_writer, affinity) ct.start() self._capture_threads.append(ct) _log.debug("Done capture starting!")
def capture_start(self, config_json=""): """@brief start the dspsr instance then turn on dada_junkdb instance.""" log.info("Starting EDD backend") if self.state != "ready": raise FailReply( "pipleine state is not in state = ready, but in state = {} - cannot start the pipeline" .format(self.state)) #return self.state = "starting" try: mkrecvheader_file = tempfile.NamedTemporaryFile(delete=False) log.debug("Creating mkrec header file: {}".format( mkrecvheader_file.name)) mkrecvheader_file.write(mkrecv_header) # DADA may need this mkrecvheader_file.write("NBIT {}\n".format( self._config["input_bit_depth"])) mkrecvheader_file.write("HEAP_SIZE {}\n".format( self.input_heapSize)) mkrecvheader_file.write("\n#OTHER PARAMETERS\n") mkrecvheader_file.write("samples_per_block {}\n".format( self._config["samples_per_block"])) mkrecvheader_file.write( "\n#PARAMETERS ADDED AUTOMATICALLY BY MKRECV\n") mkrecvheader_file.close() for i, k in enumerate(self._config['enabled_polarizations']): cfg = self._config.copy() cfg.update(self._config[k]) if not self._config['dummy_input']: numa_node = self._config[k]['numa_node'] physcpu = ",".join(numa.getInfo()[numa_node]['cores'][4:9]) cmd = "taskset {physcpu} mkrecv_nt --quiet --header {mkrecv_header} --idx1-step {samples_per_heap} --dada-key {dada_key} \ --sync-epoch {sync_time} --sample-clock {sample_clock} \ --ibv-if {ibv_if} --port {port_rx} {mcast_sources}".format( mkrecv_header=mkrecvheader_file.name, physcpu=physcpu, **cfg) mk = ManagedProcess( cmd, stdout_handler=self._polarization_sensors[k] ["mkrecv_sensors"].stdout_handler) else: log.warning( "Creating Dummy input instead of listening to network!" ) cmd = "dummy_data_generator -o {dada_key} -b {input_bit_depth} -d 1000 -s 0".format( **cfg) mk = ManagedProcess(cmd) self.mkrec_cmd.append(mk) self._subprocessMonitor.add(mk, self._subprocess_error) except Exception as e: log.error("Error starting pipeline: {}".format(e)) self.state = "error" else: self.state = "running" self.__watchdogs = [] for i, k in enumerate(self._config['enabled_polarizations']): wd = SensorWatchdog( self._polarization_sensors[k]["input-buffer-total-write"], 20, self.watchdog_error) wd.start() self.__watchdogs.append(wd)
def configure(self, config_json): """@brief destroy any ring buffer and create new ring buffer.""" """ @brief Configure the EDD CCritical PFB @param config_json A JSON dictionary object containing configuration information @detail The configuration dictionary is highly flexible. An example is below: """ log.info("Configuring EDD backend for processing") log.debug("Configuration string: '{}'".format(config_json)) if self.state != "idle": raise FailReply( 'Cannot configure pipeline. Pipeline state {}.'.format( self.state)) # alternatively we should automatically deconfigure #yield self.deconfigure() self.state = "configuring" # Merge retrieved config into default via recursive dict merge def __updateConfig(oldo, new): old = oldo.copy() for k in new: if isinstance(old[k], dict): old[k] = __updateConfig(old[k], new[k]) else: old[k] = new[k] return old if isinstance(config_json, str): cfg = json.loads(config_json) elif isinstance(config_json, dict): cfg = config_json else: self.state = "idle" # no states changed raise FailReply( "Cannot handle config type {}. Config has to bei either json formatted string or dict!" .format(type(config_json))) try: self._config = __updateConfig(DEFAULT_CONFIG, cfg) except KeyError as error: self.state = "idle" # no states changed raise FailReply("Unknown configuration option: {}".format( str(error))) cfs = json.dumps(self._config, indent=4) log.info("Received configuration:\n" + cfs) self._edd_config_sensor.set_value(cfs) # calculate input buffer parameters self.input_heapSize = self._config["samples_per_heap"] * self._config[ 'input_bit_depth'] / 8 nHeaps = self._config["samples_per_block"] / self._config[ "samples_per_heap"] input_bufferSize = nHeaps * (self.input_heapSize) log.info('Input dada parameters created from configuration:\n\ heap size: {} byte\n\ heaps per block: {}\n\ buffer size: {} byte'.format(self.input_heapSize, nHeaps, input_bufferSize)) # calculate output buffer parameters nSlices = max( self._config["samples_per_block"] / self._config['fft_length'], 1) nChannels = self._config['fft_length'] / 2 # on / off spectrum + one side channel item per spectrum output_bufferSize = nSlices * 2 * nChannels * self._config[ 'output_bit_depth'] / 8 output_heapSize = output_bufferSize #output_bufferSize rate = output_bufferSize * float( self._config['sample_clock'] ) / self._config[ "samples_per_block"] # in spead documentation BYTE per second and not bit! rate *= self._config[ "output_rate_factor"] # set rate to (100+X)% of expected rate self._output_rate_status.set_value(rate / 1E9) log.info('Output parameters calculated from configuration:\n\ spectra per block: {} \n\ nChannels: {} \n\ buffer size: {} byte \n\ heap size: {} byte\n\ rate ({:.0f}%): {} Gbps'.format( nSlices, nChannels, output_bufferSize, output_heapSize, self._config["output_rate_factor"] * 100, rate / 1E9)) self._subprocessMonitor = SubprocessMonitor() for i, k in enumerate(self._config['enabled_polarizations']): numa_node = self._config[k]['numa_node'] # configure dada buffer bufferName = self._config[k]['dada_key'] yield self._create_ring_buffer(input_bufferSize, 64, bufferName, numa_node) ofname = bufferName[::-1] # we write nSlice blocks on each go yield self._create_ring_buffer(output_bufferSize, 64, ofname, numa_node) # Configure + launch # here should be a smarter system to parse the options from the # controller to the program without redundant typing of options physcpu = numa.getInfo()[numa_node]['cores'][0] cmd = "taskset {physcpu} pfb --input_key={dada_key} --inputbitdepth={input_bit_depth} --fft_length={fft_length} --ntaps={ntaps} -o {ofname} --log_level={log_level} --outputbitdepth={output_bit_depth} --output_type=dada".format( dada_key=bufferName, ofname=ofname, heapSize=self.input_heapSize, numa_node=numa_node, physcpu=physcpu, **self._config) log.debug("Command to run: {}".format(cmd)) cudaDevice = numa.getInfo()[self._config[k] ["numa_node"]]["gpus"][0] cli = ManagedProcess(cmd, env={"CUDA_VISIBLE_DEVICES": cudaDevice}) self._subprocessMonitor.add(cli, self._subprocess_error) self._subprocesses.append(cli) cfg = self._config.copy() cfg.update(self._config[k]) if self._config["output_type"] == 'dada': mksend_header_file = tempfile.NamedTemporaryFile(delete=False) mksend_header_file.write(mksend_header) mksend_header_file.close() timestep = input_bufferSize * 8 / cfg['input_bit_depth'] physcpu = ",".join(numa.getInfo()[numa_node]['cores'][1:4]) cmd = "taskset {physcpu} mksend --header {mksend_header} --nthreads 3 --dada-key {ofname} --ibv-if {ibv_if} --port {port_tx} --sync-epoch {sync_time} --sample-clock {sample_clock} --item1-step {timestep} --item2-list {polarization} --item3-list {fft_length} --item4-list {ntaps} --item6-list {sample_clock} --item5-list {sync_time} --rate {rate} --heap-size {heap_size} {mcast_dest}".format( mksend_header=mksend_header_file.name, timestep=timestep, ofname=ofname, polarization=i, nChannels=nChannels, physcpu=physcpu, rate=rate, heap_size=output_heapSize, **cfg) elif self._config["output_type"] == 'disk': cmd = "dada_dbnull -z -k {}".format(ofname) if not os.path.isdir("./{ofname}".format(ofname=ofname)): os.mkdir("./{ofname}".format(ofname=ofname)) cmd = "dada_dbdisk -k {ofname} -D ./{ofname} -W".format( ofname=ofname, **cfg) else: log.warning("Selected null output. Not sending data!") cmd = "dada_dbnull -z -k {}".format() log.debug("Command to run: {}".format(cmd)) mks = ManagedProcess(cmd) self._subprocessMonitor.add(mks, self._subprocess_error) self._subprocesses.append(mks) self._subprocessMonitor.start() self.state = "ready"
def measurement_start(self): """ @brief Create output directory, create mkrecv header, calculate tempo2 predictor file if needed, start data capture, processing, and monitoring """ if self._config["active"] == 0: log.info("Pipeline is not active") raise StateChange("ready") self._timer = Time.now() self._subprocessMonitor = SubprocessMonitor() try: self._png_monitor_callback.stop() except Exception as E: log.error("Png monitor already stopped.") log.exception(E) try: self._folder_size_monitor_callback.stop() except Exception as E: log.error("File size monitor already stopped.") log.exception(E) self._source_name = self.__eddDataStore.getTelescopeDataItem( "source-name") ra = self.__eddDataStore.getTelescopeDataItem("ra") decl = self.__eddDataStore.getTelescopeDataItem("dec") scannum = self.__eddDataStore.getTelescopeDataItem("scannum") subscannum = self.__eddDataStore.getTelescopeDataItem("subscannum") receiver_name = self.__eddDataStore.getTelescopeDataItem("receiver") project_id = self.__eddDataStore.getTelescopeDataItem("project") log.info( "Retrieved data from telescope:\n Source name: {}\n RA = {}, decl = {}, receiver = {}" .format(self._source_name, ra, decl, receiver_name)) if self._config["mode"] == "Timing": self.tzpar_file = os.path.join( self.tzpar_dir, '{}.par'.format(self._source_name.split("_")[0][1:])) log.debug("Checking parfile file {}".format(self.tzpar_file)) self.pulsar_flag = is_accessible(self.tzpar_file) if (parse_tag(self._source_name) != "R") and (not self.pulsar_flag): log.warning( "source {} is neither pulsar nor calibrator. Will not react until next measurement start" .format(self._source_name)) raise StateChange("ready") log.debug("Setting blank image") self._fscrunch.set_value(BLANK_IMAGE) self._tscrunch.set_value(BLANK_IMAGE) self._profile.set_value(BLANK_IMAGE) log.debug("writing mkrecv header") self.cuda_number = numa.getInfo()[self.numa_number]['gpus'][0] log.info(" - Running on cuda core: {}".format(self.cuda_number)) header = self._config["dada_header_params"] central_freq = header["frequency_mhz"] self._central_freq.set_value(str(header["frequency_mhz"])) self._source_name_sensor.set_value(self._source_name) self._nchannels.set_value(self._config["nchannels"]) self._nbins.set_value(self._config["nbins"]) header["telescope"] = self._config["tempo2_telescope_name"] log.info(" - Tempo2 telescope name: {}".format(header['telescope'])) c = SkyCoord("{} {}".format(ra, decl), unit=(u.deg, u.deg)) header["ra"] = c.to_string("hmsdms").split(" ")[0].replace( "h", ":").replace("m", ":").replace("s", "") header["dec"] = c.to_string("hmsdms").split(" ")[1].replace( "d", ":").replace("m", ":").replace("s", "") header["key"] = self._dada_buffers[0] log.debug(" - Dada key: {}".format(header['key'])) header["mc_source"] = "" for i in self._config['input_data_streams']: header["mc_source"] += i["ip"] + "," header["mc_source"] = header["mc_source"][:-1] log.info(" - mc source: {}".format(header['mc_source'])) header["mc_streaming_port"] = self._config['input_data_streams'][0][ "port"] log.info(" - mc streaming port: {}".format( header['mc_streaming_port'])) header["interface"] = numa.getFastestNic(self.numa_number)[1]['ip'] log.info(" - mc interface: {}".format(header['interface'])) header["sync_time"] = self.sync_epoch log.info(" - sync time: {}".format(header['sync_time'])) if header['sample_clock'] == "unset": header["sample_clock"] = float( self._config['input_data_streams'][0]["sample_rate"] ) # adjsutment for the predecimation factor is done in the amster controller log.info(" - sample_clock: {}".format(header['sample_clock'])) header["source_name"] = self._source_name header["obs_id"] = "{0}_{1}".format(scannum, subscannum) header["filesize"] = int(float(self._config["dada"]["size"])) log.info(" - obs_id: {}".format(header['obs_id'])) header["receiver_name"] = receiver_name log.info(" - receiver_name: {}".format(header['receiver_name'])) tstr = Time.now().isot.replace(":", "-") tdate = tstr.split("T")[0] log.debug("Setting up the input and scrunch data directories") if self._config["mode"] == "Timing": try: self.in_path = os.path.join("/mnt/", project_id, tdate, self._source_name, str(central_freq), tstr, "raw_data") self.out_path = os.path.join("/mnt/", project_id, tdate, self._source_name, str(central_freq), tstr, "combined_data") log.debug("Creating directories") log.info("Data will be written to {}".format(self.in_path)) log.debug("out path {}".format(self.out_path)) if not os.path.isdir(self.in_path): os.makedirs(self.in_path) if not os.path.isdir(self.out_path): os.makedirs(self.out_path) os.chdir(self.in_path) log.debug("Change to workdir: {}".format(os.getcwd())) log.debug("Current working directory: {}".format(os.getcwd())) except Exception as error: raise EddPulsarPipelineError(str(error)) else: try: self.in_path = os.path.join("/mnt/", project_id, tdate, self._source_name, str(central_freq), tstr) log.debug("Creating directories") log.info("Data will be written to {}".format(self.in_path)) if not os.path.isdir(self.in_path): os.makedirs(self.in_path) os.chdir(self.in_path) log.debug("Change to workdir: {}".format(os.getcwd())) log.debug("Current working directory: {}".format(os.getcwd())) except Exception as error: raise EddPulsarPipelineError(str(error)) os.chdir("/tmp/") log.debug("Creating the predictor with tempo2") if self._config["mode"] == "Timing": if (parse_tag(self._source_name) != "R") & is_accessible( self.tzpar_file): cmd = 'numactl -m {} taskset -c {} tempo2 -f {} -pred'.format( self.numa_number, self.__coreManager.get_coresstr('dspsr'), self.tzpar_file).split() cmd.append("{} {} {} {} {} {} {} 3599.999999999".format( self._config["tempo2_telescope_name"], Time.now().mjd - 0.5, Time.now().mjd + 0.5, float(central_freq) - (float(header["bandwidth"]) / 2), float(central_freq) + (float(header["bandwidth"]) / 2), self._config["tempo2_ntimecoeff"], self._config["tempo2_nfreqcoeff"])) log.info("Command to run: {}".format(cmd)) yield command_watcher(cmd, allow_fail=True) attempts = 0 retries = 5 while True: if attempts >= retries: error = "Could not read t2pred.dat" log.warning( "{}. Will not react until next measurement start". format(error)) raise StateChange("ready") else: yield sleep(1) if is_accessible('{}/t2pred.dat'.format(os.getcwd())): log.debug('found {}/t2pred.dat'.format( os.getcwd())) break else: attempts += 1 self.dada_header_file = tempfile.NamedTemporaryFile( mode="w", prefix="edd_dada_header_", suffix=".txt", dir="/tmp/", delete=False) log.debug("Writing dada header file to {0}".format( self.dada_header_file.name)) header_string = render_dada_header(header) self.dada_header_file.write(header_string) self.dada_key_file = tempfile.NamedTemporaryFile( mode="w", prefix="dada_keyfile_", suffix=".key", dir="/tmp/", delete=False) log.debug("Writing dada key file to {0}".format( self.dada_key_file.name)) key_string = make_dada_key_string(self._dada_buffers[1]) self.dada_key_file.write(make_dada_key_string(self._dada_buffers[1])) log.debug("Dada key file contains:\n{0}".format(key_string)) self.dada_header_file.close() self.dada_key_file.close() attempts = 0 retries = 5 while True: if attempts >= retries: error = "could not read dada_key_file" log.warning( "{}. Will not react until next measurement start".format( error)) raise StateChange("ready") else: yield sleep(1) if is_accessible('{}'.format(self.dada_key_file.name)): log.debug('found {}'.format(self.dada_key_file.name)) break else: attempts += 1 ## Setting DM value for filterbank recording self.par_dict = {} self.dm = self._config["cdd_dm"] try: with open( os.path.join( self.tzpar_dir, '{}.par'.format( self._source_name.split("_")[0][1:]))) as fh: for line in fh: if len(line.strip().split()) == 2: key, value = line.strip().split() elif len(line.strip().split()) == 3: key, value, error = line.strip().split() elif len(line.strip().split()) == 4: key, value, lock, error = line.strip().split() self.par_dict[key] = value.strip() except IOError as error: log.error(error) try: self.dm = float(self.par_dict["DM"]) except KeyError as error: log.info("Key {} not found, will use default value of {}".format( error, self.dm)) if parse_tag(self._source_name) == "R": log.info("This is a calibrator scan, will set dm to zero") self.dm = 0 self._dm_sensor.set_value(self.dm) self._par_dict_sensor.set_value( json.dumps(self.par_dict).strip("{").strip("}").replace(",", "\n")) os.chdir(self.in_path) log.debug("source_name = {}".format(self._source_name)) if self._config["mode"] == "Timing": if (parse_tag(self._source_name) != "R") and self.pulsar_flag: cmd = "numactl -m {numa} dspsr {args} {intergration_time} {nchan} {nbin} -fft-bench -x {fft_length} -cpu {cpus} -cuda {cuda_number} -P {predictor} -N {name} -E {parfile} {keyfile}".format( numa=self.numa_number, fft_length=self._config["fft_length"], args=self._config["dspsr_params"]["args"], intergration_time="-L {}".format( self._config["intergration_time"]), nchan="-F {}:D".format(self._config["nchannels"]), nbin="-b {}".format(self._config["nbins"]), name=self._source_name.split("_")[0], predictor="/tmp/t2pred.dat", parfile=self.tzpar_file, cpus=self.__coreManager.get_coresstr('dspsr'), cuda_number=self.cuda_number, keyfile=self.dada_key_file.name) elif parse_tag(self._source_name) == "R": cmd = "numactl -m {numa} dspsr {args} {intergration_time} -c {period} -D 0.0001 -fft-bench -x {fft_length} {nchan} -cpu {cpus} -N {name} -cuda {cuda_number} {keyfile}".format( numa=self.numa_number, args=self._config["dspsr_params"]["args"], intergration_time="-L {}".format( self._config["intergration_time"]), period=self._config["cal_period"], fft_length=self._config["fft_length"], nchan="-F {}:D".format(self._config["nchannels"]), name=self._source_name, cpus=self.__coreManager.get_coresstr('dspsr'), cuda_number=self.cuda_number, keyfile=self.dada_key_file.name) else: error = "source is unknown" raise EddPulsarPipelineError(error) if self._config["mode"] == "Searching": if self._config["file_length"] == "": file_length = "" filename = "-o {}_{}_{}.fits".format(self._source_name, self.dm, self._config["npol"]) else: file_length = "-L {}".format(self._config["file_length"]) filename = "" cmd = "numactl -m {numa} digifits {args} -b 8 -F {nchan}:D -D {DM} -t {tsamp} -nsblk {nsblk} {file_length} -p {npol} -f {decimation} -do_dedisp -x {fft_length} -cpu {cpus} -cuda {cuda_number} {filename} {keyfile}".format( numa=self.numa_number, npol=self._config["npol"], args=self._config["digifits_params"]["args"], DM=self.dm, nchan=self._config["filterbank_nchannels"], fft_length=self._config["fft_length"], decimation=self._config["decimation"], nsblk=self._config["nsblk"], tsamp=self._config["tsamp"], file_length=file_length, filename=filename, cpus=self.__coreManager.get_coresstr('dspsr'), cuda_number=self.cuda_number, keyfile=self.dada_key_file.name) if self._config["mode"] == "Baseband": cmd = "numactl -m {numa} taskset -c {cpus} dbdisk_multithread -n {thread} -k dadc".format( numa=self.numa_number, in_path=self.in_path, thread=self._config["dbdisk_writing_threads"], cpus=self.__coreManager.get_coresstr('dspsr')) if self._config["mode"] == "Leap_baseband": cmd = "numactl -m {numa} taskset -c {cpus} dbdiskleap -n 8".format( numa=self.numa_number, cpus=self.__coreManager.get_coresstr('dspsr')) log.debug("Running command: {0}".format(cmd)) if self._config["mode"] == "Timing": log.info("Staring dspsr") if self._config["mode"] == "Searching": log.info("Staring digifits") if self._config["mode"] == "Baseband": log.info("Staring dbdisk_multithread") if self._config["mode"] == "Leap_baseband": log.info("Staring dbdiskleap") self._data_processing_proc = ManagedProcess(cmd) self._subprocessMonitor.add(self._data_processing_proc, self._subprocess_error) #################################################### #STARTING merging code # #################################################### if self._config["mode"] not in "Leap_baseband": cmd = "numactl -m {numa} taskset -c {cpu} {merge_application} -p {npart} -n {nthreads} --log_level=info".format( numa=self.numa_number, cpu=self.__coreManager.get_coresstr('merge'), nthreads=self._config["merge_threads"], merge_application=self._config["merge_application"], npart=self._config["npart"]) log.debug("Running command: {0}".format(cmd)) log.info("Staring EDDPolnMerge") self._merge_proc = ManagedProcess(cmd) self._subprocessMonitor.add(self._merge_proc, self._subprocess_error) #################################################### #STARTING MKRECV # #################################################### cmd = "numactl -m {numa} taskset -c {cpu} mkrecv_v4 --header {dada_header} --nthreads {threads} --lst --quiet".format( numa=self.numa_number, cpu=self.__coreManager.get_coresstr('mkrecv'), threads=self._config["nstreams"], dada_header=self.dada_header_file.name) log.debug("Running command: {0}".format(cmd)) log.info("Staring MKRECV") self._mkrecv_ingest_proc = ManagedProcess( cmd, stdout_handler=self._polarization_sensors[""] ["mkrecv_sensors"].stdout_handler) self._subprocessMonitor.add(self._mkrecv_ingest_proc, self._subprocess_error) #################################################### #STARTING ARCHIVE MONITOR # #################################################### if self._config["mode"] == "Timing": log.info("Staring archive monitor") self._archive_observer = Observer() self._archive_observer.daemon = False log.info("Input directory: {}".format(self.in_path)) log.info("Output directory: {}".format(self.out_path)) log.info("Setting up ArchiveAdder handler") self._handler = ArchiveAdder(self.out_path, self._config["zaplist"]) self._archive_observer.schedule(self._handler, str(self.in_path), recursive=False) log.info("Starting directory monitor") self._archive_observer.start() self._png_monitor_callback = tornado.ioloop.PeriodicCallback( self._png_monitor, 5000) self._png_monitor_callback.start() else: self._folder_size_monitor_callback = tornado.ioloop.PeriodicCallback( self._folder_size_monitor, 5000) self._folder_size_monitor_callback.start() self._subprocessMonitor.start() self._timer = Time.now() - self._timer log.info("Took {} s to start".format(self._timer * 86400))
def configure(self, config_json): """ Configure the EDD VLBi pipeline Args: config_json A JSON dictionary object containing configuration information """ log.info("Configuring EDD backend for processing") log.debug("Configuration string: '{}'".format(config_json)) yield self.set(config_json) cfs = json.dumps(self._config, indent=4) log.info("Final configuration:\n" + cfs) self.__numa_node_pool = [] # remove numa nodes with missing capabilities for node in numa.getInfo(): if len(numa.getInfo()[node]['gpus']) < 1: log.debug("Not enough gpus on numa node {} - removing from pool.".format(node)) continue elif len(numa.getInfo()[node]['net_devices']) < 1: log.debug("Not enough nics on numa node {} - removing from pool.".format(node)) continue else: self.__numa_node_pool.append(node) log.debug("{} numa nodes remaining in pool after cosntraints.".format(len(self.__numa_node_pool))) if len(self._config['input_data_streams']) > len(self.__numa_node_pool): raise FailReply("Not enough numa nodes to process {} polarizations!".format(len(self._config['input_data_streams']))) self._subprocessMonitor = SubprocessMonitor() #ToDo: Check that all input data streams have the same format, or allow different formats for i, streamid in enumerate(self._config['input_data_streams']): # calculate input buffer parameters stream_description = self._config['input_data_streams'][streamid] stream_description["dada_key"] = ["dada", "dadc"][i] self.add_input_stream_sensor(streamid) self.input_heapSize = stream_description["samples_per_heap"] * stream_description['bit_depth'] / 8 nHeaps = self._config["samples_per_block"] / stream_description["samples_per_heap"] input_bufferSize = nHeaps * (self.input_heapSize) log.info('Input dada parameters created from configuration:\n\ heap size: {} byte\n\ heaps per block: {}\n\ buffer size: {} byte'.format(self.input_heapSize, nHeaps, input_bufferSize)) final_payloads, final_fpss, final_framens = EDD_VDIF_Frame_Size(stream_description['sample_rate']) if self._config['payload_size'] == 'auto': payload_size = final_payloads[-1] else: payload_size = int(self._config['payload_size']) log.info('Possible frame payload sizes (add 32 for framesize):') for k in range(final_payloads.size): if payload_size == final_payloads[k]: M = "*" else: M = " " log.info(' {}{:5.0f} byte {:8.0f} frames per sec {:6.3f} nsec/frame'.format(M, final_payloads[k], final_fpss[k], final_framens[k])) if payload_size not in final_payloads: log.warning("Payload size {} possibly not conform with VDIF format!".format(payload_size)) # calculate output buffer parameters size_of_samples = ceil(1. * self._config["samples_per_block"] * 2 / 8.) # byte for two bit mode number_of_packages = ceil(size_of_samples / float(payload_size)) output_buffer_size = number_of_packages * (payload_size + self._config['vdif_header_size']) integration_time = self._config["samples_per_block"] / float(stream_description["sample_rate"]) self._integration_time_status.set_value(integration_time) rate = output_buffer_size/ integration_time # in spead documentation BYTE per second and not bit! rate *= self._config["output_rate_factor"] # set rate to (100+X)% of expected rate self._output_rate_status.set_value(rate / 1E9) log.info('Output parameters calculated from configuration:\n\ total size of data samples: {} byte\n\ number_of_packages: {}\n\ size of output buffer: {} byte\n\ rate ({:.0f}%): {} Gbps'.format(size_of_samples, number_of_packages, output_buffer_size, self._config["output_rate_factor"]*100, rate / 1E9)) numa_node = self.__numa_node_pool[i] log.debug("Associating {} with numa node {}".format(streamid, numa_node)) # configure dada buffer bufferName = stream_description['dada_key'] yield self._create_ring_buffer(input_bufferSize, 64, bufferName, numa_node) ofname = bufferName[::-1] # we write nSlice blocks on each go yield self._create_ring_buffer(output_buffer_size, 8, ofname, numa_node) # Configure + launch physcpu = numa.getInfo()[numa_node]['cores'][0] thread_id = self._config['thread_id'][streamid] station_id = self._config['thread_id'][streamid] cmd = "taskset -c {physcpu} VLBI --input_key={dada_key} --speadheap_size={heapSize} --thread_id={thread_id} --station_id={station_id} --payload_size={payload_size} --sample_rate={sample_rate} --nbits={bit_depth} -o {ofname} --log_level={log_level} --output_type=dada".format(ofname=ofname, heapSize=self.input_heapSize, numa_node=numa_node, physcpu=physcpu, thread_id=thread_id, station_id=station_id, payload_size=payload_size, log_level=self._config['log_level'], **stream_description) log.debug("Command to run: {}".format(cmd)) cudaDevice = numa.getInfo()[numa_node]['gpus'][0] cli = ManagedProcess(cmd, env={"CUDA_VISIBLE_DEVICES": cudaDevice}) self._subprocessMonitor.add(cli, self._subprocess_error) self._subprocesses.append(cli) cfg = self._config.copy() cfg.update(stream_description) ip_range = [] port = set() for key in self._config["output_data_streams"]: if streamid in key: ip_range.append(self._config["output_data_streams"][key]['ip']) port.add(self._config["output_data_streams"][key]['port']) if len(port)!=1: raise FailReply("Output data for one plarization has to be on the same port! ") if self._config["output_type"] == 'network': physcpu = ",".join(numa.getInfo()[numa_node]['cores'][1:2]) fastest_nic, nic_params = numa.getFastestNic(numa_node) log.info("Sending data for {} on NIC {} [ {} ] @ {} Mbit/s".format(streamid, fastest_nic, nic_params['ip'], nic_params['speed'])) cmd = "taskset -c {physcpu} vdif_send --input_key {ofname} --if_ip {ibv_if} --dest_ip {mcast_dest} --port {port_tx} --max_rate {rate}".format(ofname=ofname, physcpu=physcpu, ibv_if=nic_params['ip'], mcast_dest=" ".join(ip_range), port_tx=port.pop(), rate=rate) log.debug("Command to run: {}".format(cmd)) elif self._config["output_type"] == 'disk': ofpath = os.path.join(cfg["output_directory"], ofname) log.debug("Writing output to {}".format(ofpath)) if not os.path.isdir(ofpath): os.makedirs(ofpath) cmd = "dada_dbdisk -k {ofname} -D {ofpath} -W".format(ofname=ofname, ofpath=ofpath, **cfg) else: log.warning("Selected null output. Not sending data!") cmd = "dada_dbnull -z -k {}".format(ofname) log.debug("Command to run: {}".format(cmd)) mks = ManagedProcess(cmd, env={"CUDA_VISIBLE_DEVICES": cudaDevice}) self._subprocessMonitor.add(mks, self._subprocess_error) self._subprocesses.append(mks) self._subprocessMonitor.start()
def configure(self, config_json): """ Configure the EDD gated spectrometer Args: config_json: A JSON dictionary object containing configuration information """ log.info("Configuring EDD backend for processing") log.debug("Configuration string: '{}'".format(config_json)) yield self.set(config_json) cfs = json.dumps(self._config, indent=4) log.info("Final configuration:\n" + cfs) self.__numa_node_pool = [] # remove numa nodes with missing capabilities for node in numa.getInfo(): if len(numa.getInfo()[node]['gpus']) < 1: log.debug( "Not enough gpus on numa node {} - removing from pool.". format(node)) continue elif len(numa.getInfo()[node]['net_devices']) < 1: log.debug( "Not enough nics on numa node {} - removing from pool.". format(node)) continue else: self.__numa_node_pool.append(node) log.debug("{} numa nodes remaining in pool after constraints.".format( len(self.__numa_node_pool))) if len(self.__numa_node_pool) == 0: if self._config['nonfatal_numacheck']: log.warning("Not enough numa nodes to process data!") self.__numa_node_pool = numa.getInfo().keys() else: raise FailReply("Not enough numa nodes to process data!") self._subprocessMonitor = SubprocessMonitor() if len(self._config['input_data_streams']) != 2: raise FailReply("Require 2 polarization input, got {}".format( len(self._config['input_data_streams']))) log.debug("Merging ip ranges") self.stream_description = copy.deepcopy( self._config['input_data_streams'].items()[0][1]) self.stream_description["ip"] += ",{}".format( self._config['input_data_streams'].items()[1][1]["ip"]) log.debug("Merged ip ranges: {}".format(self.stream_description["ip"])) self.input_heapSize = self.stream_description[ "samples_per_heap"] * self.stream_description['bit_depth'] // 8 nHeaps = self._config["samples_per_block"] // self.stream_description[ "samples_per_heap"] input_bufferSize = nHeaps * (self.input_heapSize + 64 // 8) log.info('Input dada parameters created from configuration:\n\ heap size: {} byte\n\ heaps per block: {}\n\ buffer size: {} byte'.format(self.input_heapSize, nHeaps, input_bufferSize)) # calculate output buffer parameters nSlices = max( self._config["samples_per_block"] // 2 // self._config['fft_length'] // self._config['naccumulate'], 1) nChannels = self._config['fft_length'] // 2 + 1 # on / off spectrum + one side channel item per spectrum output_bufferSize = nSlices * (8 * (nChannels * 32 // 8 + 2 * 8)) output_heapSize = nChannels * 32 // 8 integrationTime = self._config['fft_length'] * self._config[ 'naccumulate'] / (float(self.stream_description["sample_rate"])) self._integration_time_status.set_value(integrationTime) rate = output_heapSize / integrationTime # in spead documentation BYTE per second and not bit! rate *= self._config[ "output_rate_factor"] # set rate to (100+X)% of expected rate self._output_rate_status.set_value(rate / 1E9) log.info('Output parameters calculated from configuration:\n\ spectra per block: {} \n\ nChannels: {} \n\ buffer size: {} byte \n\ integrationTime : {} s \n\ heap size: {} byte\n\ rate ({:.0f}%): {} Gbps'.format( nSlices, nChannels, output_bufferSize, integrationTime, output_heapSize, self._config["output_rate_factor"] * 100, rate / 1E9)) numa_node = self.__numa_node_pool[0] log.debug("Associating with numa node {}".format(numa_node)) # configure dada buffer yield self._create_ring_buffer(input_bufferSize, 64, self.__dada_key, numa_node) ofname = self.__dada_key[::-1] # we write nSlice blocks on each go yield self._create_ring_buffer(output_bufferSize, 8 * nSlices, ofname, numa_node) ## specify all subprocesses self.__coreManager = CoreManager(numa_node) self.__coreManager.add_task("gated_spectrometer", 1) N_inputips = 0 for p in self.stream_description["ip"].split(','): N_inputips += len(ipstring_to_list(p)) log.debug("Found {} input ips".format(N_inputips)) if not self._config["dummy_input"]: self.__coreManager.add_task("mkrecv", N_inputips + 1, prefere_isolated=True) if self._config["output_type"] == "network": self.__coreManager.add_task("mksend", 2) # Configure + launch cmd = "taskset -c {physcpu} gated_spectrometer --nsidechannelitems=1 --input_key={dada_key} --speadheap_size={heapSize} --selected_sidechannel=0 --nbits={bit_depth} --fft_length={fft_length} --naccumulate={naccumulate} -o {ofname} --log_level={log_level} --output_format=Stokes --input_polarizations=Dual --output_type=dada".format( dada_key=self.__dada_key, ofname=ofname, heapSize=self.input_heapSize, numa_node=numa_node, bit_depth=self.stream_description['bit_depth'], physcpu=self.__coreManager.get_coresstr('gated_spectrometer'), **self._config) log.debug("Command to run: {}".format(cmd)) cudaDevice = numa.getInfo()[numa_node]['gpus'][0] gated_cli = ManagedProcess(cmd, env={"CUDA_VISIBLE_DEVICES": cudaDevice}) log.debug("Visble Cuda Device: {}".format(cudaDevice)) self._subprocessMonitor.add(gated_cli, self._subprocess_error) self._subprocesses.append(gated_cli) cfg = self._config.copy() cfg.update(self.stream_description) cfg["dada_key"] = self.__dada_key ip_range = [] port = set() for key in self._config["output_data_streams"]: ip_range.append(self._config["output_data_streams"][key]['ip']) port.add(self._config["output_data_streams"][key]['port']) if len(port) != 1: raise FailReply("Output data has to be on the same port! ") if self._config["output_type"] == 'network': mksend_header_file = tempfile.NamedTemporaryFile(delete=False) mksend_header_file.write(_mksend_header) mksend_header_file.close() nhops = len(ip_range) timestep = cfg["fft_length"] * cfg["naccumulate"] #select network interface fastest_nic, nic_params = numa.getFastestNic(numa_node) heap_id_start = 0 #2 * i # two output spectra per pol log.info("Sending data on NIC {} [ {} ] @ {} Mbit/s".format( fastest_nic, nic_params['ip'], nic_params['speed'])) cmd = "taskset -c {physcpu} mksend --header {mksend_header} --heap-id-start {heap_id_start} --dada-key {ofname} --ibv-if {ibv_if} --port {port_tx} --sync-epoch {sync_time} --sample-clock {sample_rate} --item1-step {timestep} --item4-list {fft_length} --item6-list {sync_time} --item7-list {sample_rate} --item8-list {naccumulate} --rate {rate} --heap-size {heap_size} --nhops {nhops} {mcast_dest}".format( mksend_header=mksend_header_file.name, heap_id_start=heap_id_start, timestep=timestep, ofname=ofname, nChannels=nChannels, physcpu=self.__coreManager.get_coresstr('mksend'), integrationTime=integrationTime, rate=rate, nhops=nhops, heap_size=output_heapSize, ibv_if=nic_params['ip'], mcast_dest=" ".join(ip_range), port_tx=port.pop(), **cfg) log.debug("Command to run: {}".format(cmd)) elif self._config["output_type"] == 'disk': ofpath = os.path.join(cfg["output_directory"], ofname) log.debug("Writing output to {}".format(ofpath)) if not os.path.isdir(ofpath): os.makedirs(ofpath) cmd = "dada_dbdisk -k {ofname} -D {ofpath} -W".format( ofname=ofname, ofpath=ofpath, **cfg) else: log.warning("Selected null output. Not sending data!") cmd = "dada_dbnull -z -k {}".format(ofname) mks = ManagedProcess(cmd, env={"CUDA_VISIBLE_DEVICES": cudaDevice}) self._subprocessMonitor.add(mks, self._subprocess_error) self._subprocesses.append(mks) self._subprocessMonitor.start()
def test_use_non_isolated_cores(self): self.cm.add_task('T1', len(numa.getInfo()['0']['isolated_cores']) + 1, prefere_isolated=True) self.cm.get_cores('T1')
def test_too_many_isolated_cores(self): self.cm.add_task('T1', len(numa.getInfo()['0']['isolated_cores']) + 1, require_isolated=True) with self.assertRaises(Exception) as context: self.cm.get_cores('T1')
def test_too_many_cores(self): self.cm.add_task('T1', len(numa.getInfo()['0']['cores']) + len(numa.getInfo()['0']['isolated_cores'])+ 1 ) self.assertEqual(len(self.cm.get_cores('T1')), len(numa.getInfo()['0']['cores']))